/** * Evaluates the aggregators on the current batch. * The aggregationBatchInfo must have been prepared * by calling {@link #prepareBatchAggregationBufferSets} first. */ protected void processAggregators(VectorizedRowBatch batch) throws HiveException { // We now have a vector of aggregation buffer sets to use for each row // We can start computing the aggregates. // If the number of distinct keys in the batch is 1 we can // use the optimized code path of aggregateInput VectorAggregationBufferRow[] aggregationBufferSets = aggregationBatchInfo.getAggregationBuffers(); if (aggregationBatchInfo.getDistinctBufferSetCount() == 1) { VectorAggregateExpression.AggregationBuffer[] aggregationBuffers = aggregationBufferSets[0].getAggregationBuffers(); for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInput(aggregationBuffers[i], batch); } } else { for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInputSelection( aggregationBufferSets, i, batch); } } }
/** * Computes the memory limits for hash table flush (spill). */ private void computeMemoryLimits() { JavaDataModel model = JavaDataModel.get(); fixedHashEntrySize = model.hashMapEntry() + keyWrappersBatch.getKeysFixedSize() + aggregationBatchInfo.getAggregatorsFixedSize(); MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean(); maxMemory = memoryMXBean.getHeapMemoryUsage().getMax(); memoryThreshold = conf.getMemoryThreshold(); // Tests may leave this unitialized, so better set it to 1 if (memoryThreshold == 0.0f) { memoryThreshold = 1.0f; } maxHashTblMemory = (int)(maxMemory * memoryThreshold); if (LOG.isDebugEnabled()) { LOG.debug(String.format("maxMemory:%dMb (%d * %f) fixSize:%d (key:%d agg:%d)", maxHashTblMemory/1024/1024, maxMemory/1024/1024, memoryThreshold, fixedHashEntrySize, keyWrappersBatch.getKeysFixedSize(), aggregationBatchInfo.getAggregatorsFixedSize())); } }
/** * Updates the average variable size of the hash table entries. * The average is only updates by probing the batch that added the entry in the hash table * that caused the check threshold to be reached. */ private void updateAvgVariableSize(VectorizedRowBatch batch) { int keyVariableSize = keyWrappersBatch.getVariableSize(batch.size); int aggVariableSize = aggregationBatchInfo.getVariableSize(batch.size); // This assumes the distribution of variable size keys/aggregates in the input // is the same as the distribution of variable sizes in the hash entries avgVariableSize = (int)((avgVariableSize * sumBatchSize + keyVariableSize +aggVariableSize) / (sumBatchSize + batch.size)); }
aggregationBatchInfo.startBatch(); numEntriesSinceCheck++; aggregationBatchInfo.mapAggregationBufferSet(aggregationBuffer, i);
aggregationBatchInfo = new VectorAggregationBufferBatch(); aggregationBatchInfo.compileAggregationBatchInfo(aggregators);
/** * Locates the aggregation buffer sets to use for each key in the current batch. * The keyWrappersBatch must have evaluated the current batch first. */ private void prepareBatchAggregationBufferSets(VectorizedRowBatch batch) throws HiveException { // The aggregation batch vector needs to know when we start a new batch // to bump its internal version. aggregationBatchInfo.startBatch(); // We now have to probe the global hash and find-or-allocate // the aggregation buffers to use for each key present in the batch VectorHashKeyWrapper[] keyWrappers = keyWrappersBatch.getVectorHashKeyWrappers(); for (int i=0; i < batch.size; ++i) { VectorHashKeyWrapper kw = keyWrappers[i]; VectorAggregationBufferRow aggregationBuffer = mapKeysAggregationBuffers.get(kw); if (null == aggregationBuffer) { // the probe failed, we must allocate a set of aggregation buffers // and push the (keywrapper,buffers) pair into the hash. // is very important to clone the keywrapper, the one we have from our // keyWrappersBatch is going to be reset/reused on next batch. aggregationBuffer = allocateAggregationBuffer(); mapKeysAggregationBuffers.put(kw.copyKey(), aggregationBuffer); numEntriesHashTable++; numEntriesSinceCheck++; } aggregationBatchInfo.mapAggregationBufferSet(aggregationBuffer, i); } }
aggregationBatchInfo = new VectorAggregationBufferBatch(); aggregationBatchInfo.compileAggregationBatchInfo(aggregators);
aggregationBatchInfo.startBatch(); int flushMark = 0; ++flushMark; aggregationBatchInfo.mapAggregationBufferSet(currentStreamingAggregators, i);
/** * Evaluates the aggregators on the current batch. * The aggregationBatchInfo must have been prepared * by calling {@link #prepareBatchAggregationBufferSets} first. */ protected void processAggregators(VectorizedRowBatch batch) throws HiveException { // We now have a vector of aggregation buffer sets to use for each row // We can start computing the aggregates. // If the number of distinct keys in the batch is 1 we can // use the optimized code path of aggregateInput VectorAggregationBufferRow[] aggregationBufferSets = aggregationBatchInfo.getAggregationBuffers(); if (aggregationBatchInfo.getDistinctBufferSetCount() == 1) { VectorAggregateExpression.AggregationBuffer[] aggregationBuffers = aggregationBufferSets[0].getAggregationBuffers(); for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInput(aggregationBuffers[i], batch); } } else { for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInputSelection( aggregationBufferSets, i, batch); } } }
aggregationBatchInfo = new VectorAggregationBufferBatch(); aggregationBatchInfo.compileAggregationBatchInfo(aggregators);
/** * Updates the average variable size of the hash table entries. * The average is only updates by probing the batch that added the entry in the hash table * that caused the check threshold to be reached. */ private void updateAvgVariableSize(VectorizedRowBatch batch) { int keyVariableSize = keyWrappersBatch.getVariableSize(batch.size); int aggVariableSize = aggregationBatchInfo.getVariableSize(batch.size); // This assumes the distribution of variable size keys/aggregates in the input // is the same as the distribution of variable sizes in the hash entries avgVariableSize = (int)((avgVariableSize * sumBatchSize + keyVariableSize +aggVariableSize) / (sumBatchSize + batch.size)); }
/** * Computes the memory limits for hash table flush (spill). */ private void computeMemoryLimits() { JavaDataModel model = JavaDataModel.get(); fixedHashEntrySize = model.hashMapEntry() + keyWrappersBatch.getKeysFixedSize() + aggregationBatchInfo.getAggregatorsFixedSize(); MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean(); maxMemory = isLlap ? getConf().getMaxMemoryAvailable() : memoryMXBean.getHeapMemoryUsage().getMax(); memoryThreshold = conf.getMemoryThreshold(); // Tests may leave this unitialized, so better set it to 1 if (memoryThreshold == 0.0f) { memoryThreshold = 1.0f; } maxHashTblMemory = (int)(maxMemory * memoryThreshold); if (LOG.isDebugEnabled()) { LOG.debug("GBY memory limits - isLlap: {} maxMemory: {} ({} * {}) fixSize:{} (key:{} agg:{})", isLlap, LlapUtil.humanReadableByteCount(maxHashTblMemory), LlapUtil.humanReadableByteCount(maxMemory), memoryThreshold, fixedHashEntrySize, keyWrappersBatch.getKeysFixedSize(), aggregationBatchInfo.getAggregatorsFixedSize()); } }
aggregationBatchInfo.startBatch(); int flushMark = 0; ++flushMark; aggregationBatchInfo.mapAggregationBufferSet(currentStreamingAggregators, i);
/** * Evaluates the aggregators on the current batch. * The aggregationBatchInfo must have been prepared * by calling {@link #prepareBatchAggregationBufferSets} first. */ protected void processAggregators(VectorizedRowBatch batch) throws HiveException { // We now have a vector of aggregation buffer sets to use for each row // We can start computing the aggregates. // If the number of distinct keys in the batch is 1 we can // use the optimized code path of aggregateInput VectorAggregationBufferRow[] aggregationBufferSets = aggregationBatchInfo.getAggregationBuffers(); if (aggregationBatchInfo.getDistinctBufferSetCount() == 1) { VectorAggregateExpression.AggregationBuffer[] aggregationBuffers = aggregationBufferSets[0].getAggregationBuffers(); for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInput(aggregationBuffers[i], batch); } } else { for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInputSelection( aggregationBufferSets, i, batch); } } }
/** * Updates the average variable size of the hash table entries. * The average is only updates by probing the batch that added the entry in the hash table * that caused the check threshold to be reached. */ private void updateAvgVariableSize(VectorizedRowBatch batch) { int keyVariableSize = keyWrappersBatch.getVariableSize(batch.size); int aggVariableSize = aggregationBatchInfo.getVariableSize(batch.size); // This assumes the distribution of variable size keys/aggregates in the input // is the same as the distribution of variable sizes in the hash entries avgVariableSize = (int)((avgVariableSize * sumBatchSize + keyVariableSize +aggVariableSize) / (sumBatchSize + batch.size)); }
/** * Computes the memory limits for hash table flush (spill). */ private void computeMemoryLimits() { JavaDataModel model = JavaDataModel.get(); fixedHashEntrySize = model.hashMapEntry() + keyWrappersBatch.getKeysFixedSize() + aggregationBatchInfo.getAggregatorsFixedSize(); MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean(); maxMemory = memoryMXBean.getHeapMemoryUsage().getMax(); memoryThreshold = conf.getMemoryThreshold(); // Tests may leave this unitialized, so better set it to 1 if (memoryThreshold == 0.0f) { memoryThreshold = 1.0f; } maxHashTblMemory = (int)(maxMemory * memoryThreshold); if (LOG.isDebugEnabled()) { LOG.debug(String.format("maxMemory:%dMb (%d * %f) fixSize:%d (key:%d agg:%d)", maxHashTblMemory/1024/1024, maxMemory/1024/1024, memoryThreshold, fixedHashEntrySize, keyWrappersBatch.getKeysFixedSize(), aggregationBatchInfo.getAggregatorsFixedSize())); } }
/** * Locates the aggregation buffer sets to use for each key in the current batch. * The keyWrappersBatch must have evaluated the current batch first. */ private void prepareBatchAggregationBufferSets(VectorizedRowBatch batch) throws HiveException { // The aggregation batch vector needs to know when we start a new batch // to bump its internal version. aggregationBatchInfo.startBatch(); // We now have to probe the global hash and find-or-allocate // the aggregation buffers to use for each key present in the batch VectorHashKeyWrapper[] keyWrappers = keyWrappersBatch.getVectorHashKeyWrappers(); for (int i=0; i < batch.size; ++i) { VectorHashKeyWrapper kw = keyWrappers[i]; VectorAggregationBufferRow aggregationBuffer = mapKeysAggregationBuffers.get(kw); if (null == aggregationBuffer) { // the probe failed, we must allocate a set of aggregation buffers // and push the (keywrapper,buffers) pair into the hash. // is very important to clone the keywrapper, the one we have from our // keyWrappersBatch is going to be reset/reused on next batch. aggregationBuffer = allocateAggregationBuffer(); mapKeysAggregationBuffers.put(kw.copyKey(), aggregationBuffer); numEntriesHashTable++; numEntriesSinceCheck++; } aggregationBatchInfo.mapAggregationBufferSet(aggregationBuffer, i); } }
aggregationBatchInfo.startBatch(); int flushMark = 0; ++flushMark; aggregationBatchInfo.mapAggregationBufferSet(currentStreamingAggregators, i);