org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator$ProcessingModeHashAggregate java code examples

 break;
case HASH:
 processingMode = this.new ProcessingModeHashAggregate();
 break;
case MERGE_PARTIAL:

/**
 * Locates the aggregation buffer sets to use for each key in the current batch.
 * The keyWrappersBatch must have evaluated the current batch first.
 */
private void prepareBatchAggregationBufferSets(VectorizedRowBatch batch) throws HiveException {
 // The aggregation batch vector needs to know when we start a new batch
 // to bump its internal version.
 aggregationBatchInfo.startBatch();
 // We now have to probe the global hash and find-or-allocate
 // the aggregation buffers to use for each key present in the batch
 VectorHashKeyWrapper[] keyWrappers = keyWrappersBatch.getVectorHashKeyWrappers();
 for (int i=0; i < batch.size; ++i) {
  VectorHashKeyWrapper kw = keyWrappers[i];
  VectorAggregationBufferRow aggregationBuffer = mapKeysAggregationBuffers.get(kw);
  if (null == aggregationBuffer) {
   // the probe failed, we must allocate a set of aggregation buffers
   // and push the (keywrapper,buffers) pair into the hash.
   // is very important to clone the keywrapper, the one we have from our
   // keyWrappersBatch is going to be reset/reused on next batch.
   aggregationBuffer = allocateAggregationBuffer();
   mapKeysAggregationBuffers.put(kw.copyKey(), aggregationBuffer);
   numEntriesHashTable++;
   numEntriesSinceCheck++;
  }
  aggregationBatchInfo.mapAggregationBufferSet(aggregationBuffer, i);
 }
}

prepareBatchAggregationBufferSets(batch);
processAggregators(batch);
while (shouldFlush(batch)) {
 flush(false);
 updateAvgVariableSize(batch);
checkHashModeEfficiency();

} else {
 processingMode = this.new ProcessingModeHashAggregate();

prepareBatchAggregationBufferSets(batch);
processAggregators(batch);
while (shouldFlush(batch)) {
 flush(false);
 updateAvgVariableSize(batch);
checkHashModeEfficiency();

computeMemoryLimits();
LOG.info("using hash aggregation processing mode");

/**
 * Locates the aggregation buffer sets to use for each key in the current batch.
 * The keyWrappersBatch must have evaluated the current batch first.
 */
private void prepareBatchAggregationBufferSets(VectorizedRowBatch batch) throws HiveException {
 // The aggregation batch vector needs to know when we start a new batch
 // to bump its internal version.
 aggregationBatchInfo.startBatch();
 // We now have to probe the global hash and find-or-allocate
 // the aggregation buffers to use for each key present in the batch
 VectorHashKeyWrapper[] keyWrappers = keyWrappersBatch.getVectorHashKeyWrappers();
 for (int i=0; i < batch.size; ++i) {
  VectorHashKeyWrapper kw = keyWrappers[i];
  VectorAggregationBufferRow aggregationBuffer = mapKeysAggregationBuffers.get(kw);
  if (null == aggregationBuffer) {
   // the probe failed, we must allocate a set of aggregation buffers
   // and push the (keywrapper,buffers) pair into the hash.
   // is very important to clone the keywrapper, the one we have from our
   // keyWrappersBatch is going to be reset/reused on next batch.
   aggregationBuffer = allocateAggregationBuffer();
   mapKeysAggregationBuffers.put(kw.copyKey(), aggregationBuffer);
   numEntriesHashTable++;
   numEntriesSinceCheck++;
  }
  aggregationBatchInfo.mapAggregationBufferSet(aggregationBuffer, i);
 }
}

/**
 * Returns true if the memory threshold for the hash table was reached.
 */
private boolean shouldFlush(VectorizedRowBatch batch) {
 if (batch.size == 0) {
  return false;
 }
 //numEntriesSinceCheck is the number of entries added to the hash table
 // since the last time we checked the average variable size
 if (numEntriesSinceCheck >= this.checkInterval) {
  // Were going to update the average variable row size by sampling the current batch
  updateAvgVariableSize(batch);
  numEntriesSinceCheck = 0;
 }
 if (numEntriesHashTable > this.maxHtEntries ||
   numEntriesHashTable * (fixedHashEntrySize + avgVariableSize) > maxHashTblMemory) {
  return true;
 }
 if (gcCanary.get() == null) {
  return true;
 }
 return false;
}

 /**
  * Checks if the HT reduces the number of entries by at least minReductionHashAggr factor
  * @throws HiveException
  */
 private void checkHashModeEfficiency() throws HiveException {
  if (lastModeCheckRowCount > numRowsCompareHashAggr) {
   lastModeCheckRowCount = 0;
   if (LOG.isDebugEnabled()) {
    LOG.debug(String.format("checkHashModeEfficiency: HT:%d RC:%d MIN:%d",
      numEntriesHashTable, sumBatchSize, (long)(sumBatchSize * minReductionHashAggr)));
   }
   if (numEntriesHashTable > sumBatchSize * minReductionHashAggr) {
    flush(true);
    changeToUnsortedStreamingMode();
   }
  }
 }
}

@Override
public void close(boolean aborted) throws HiveException {
 if (!aborted) {
  flush(true);
 }
}

 break;
case HASH:
 processingMode = this.new ProcessingModeHashAggregate();
 break;
case MERGE_PARTIAL:

computeMemoryLimits();
LOG.debug("using hash aggregation processing mode");

prepareBatchAggregationBufferSets(batch);
processAggregators(batch);
while (shouldFlush(batch)) {
 flush(false);
 updateAvgVariableSize(batch);
checkHashModeEfficiency();

computeMemoryLimits();
LOG.debug("using hash aggregation processing mode");

aggregationBuffer = allocateAggregationBuffer();
mapKeysAggregationBuffers.put(kw.copyKey(), aggregationBuffer);
numEntriesHashTable++;

/**
 * Returns true if the memory threshold for the hash table was reached.
 */
private boolean shouldFlush(VectorizedRowBatch batch) {
 if (batch.size == 0) {
  return false;
 }
 //numEntriesSinceCheck is the number of entries added to the hash table
 // since the last time we checked the average variable size
 if (numEntriesSinceCheck >= this.checkInterval) {
  // Were going to update the average variable row size by sampling the current batch
  updateAvgVariableSize(batch);
  numEntriesSinceCheck = 0;
 }
 if (numEntriesHashTable > this.maxHtEntries ||
   numEntriesHashTable * (fixedHashEntrySize + avgVariableSize) > maxHashTblMemory) {
  return true;
 }
 if (gcCanary.get() == null) {
  return true;
 }
 return false;
}

/**
 * Returns true if the memory threshold for the hash table was reached.
 */
private boolean shouldFlush(VectorizedRowBatch batch) {
 if (batch.size == 0) {
  return false;
 }
 //numEntriesSinceCheck is the number of entries added to the hash table
 // since the last time we checked the average variable size
 if (numEntriesSinceCheck >= this.checkInterval) {
  // Were going to update the average variable row size by sampling the current batch
  updateAvgVariableSize(batch);
  numEntriesSinceCheck = 0;
 }
 if (numEntriesHashTable > this.maxHtEntries ||
   numEntriesHashTable * (fixedHashEntrySize + avgVariableSize) > maxHashTblMemory) {
  return true;
 }
 if (gcCanary.get() == null) {
  return true;
 }
 return false;
}

 /**
  * Checks if the HT reduces the number of entries by at least minReductionHashAggr factor
  * @throws HiveException
  */
 private void checkHashModeEfficiency() throws HiveException {
  if (lastModeCheckRowCount > numRowsCompareHashAggr) {
   lastModeCheckRowCount = 0;
   if (LOG.isDebugEnabled()) {
    LOG.debug(String.format("checkHashModeEfficiency: HT:%d RC:%d MIN:%d",
      numEntriesHashTable, sumBatchSize, (long)(sumBatchSize * minReductionHashAggr)));
   }
   if (numEntriesHashTable > sumBatchSize * minReductionHashAggr) {
    flush(true);
    changeToStreamingMode();
   }
  }
 }
}

@Override
public void close(boolean aborted) throws HiveException {
 if (!aborted) {
  flush(true);
 }
 if (!aborted && sumBatchSize == 0 && GroupByOperator.shouldEmitSummaryRow(conf)) {
  // in case the empty grouping set is preset; but no output has done
  // the "summary row" still needs to be emitted
  VectorHashKeyWrapperBase kw = keyWrappersBatch.getVectorHashKeyWrappers()[0];
  kw.setNull();
  int pos = conf.getGroupingSetPosition();
  if (pos >= 0) {
   long val = (1L << pos) - 1;
   keyWrappersBatch.setLongValue(kw, pos, val);
  }
  VectorAggregationBufferRow groupAggregators = allocateAggregationBuffer();
  writeSingleRow(kw, groupAggregators);
 }
}

 /**
  * Checks if the HT reduces the number of entries by at least minReductionHashAggr factor
  * @throws HiveException
  */
 private void checkHashModeEfficiency() throws HiveException {
  if (lastModeCheckRowCount > numRowsCompareHashAggr) {
   lastModeCheckRowCount = 0;
   if (LOG.isDebugEnabled()) {
    LOG.debug(String.format("checkHashModeEfficiency: HT:%d RC:%d MIN:%d",
      numEntriesHashTable, sumBatchSize, (long)(sumBatchSize * minReductionHashAggr)));
   }
   if (numEntriesHashTable > sumBatchSize * minReductionHashAggr) {
    flush(true);
    changeToStreamingMode();
   }
  }
 }
}

Javadoc

Hash Aggregate mode processing

Most used methods

<init>
allocateAggregationBuffer
checkHashModeEfficiency
Checks if the HT reduces the number of entries by at least minReductionHashAggr factor
computeMemoryLimits
Computes the memory limits for hash table flush (spill).
flush
Flushes the entries in the hash table by emiting output (forward). When parameter 'all' is true all
prepareBatchAggregationBufferSets
Locates the aggregation buffer sets to use for each key in the current batch. The keyWrappersBatch m
processAggregators
shouldFlush
Returns true if the memory threshold for the hash table was reached.
updateAvgVariableSize
Updates the average variable size of the hash table entries. The average is only updates by probing

Popular in Java

Parsing JSON documents to java classes using gson
setRequestProperty (URLConnection)
getContentResolver (Context)
setContentView (Activity)
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
DateFormat (java.text)
Formats or parses dates and times.This class provides factories for obtaining instances configured f
BitSet (java.util)
The BitSet class implements abit array [http://en.wikipedia.org/wiki/Bit_array]. Each element is eit
LinkedList (java.util)
Doubly-linked list implementation of the List and Dequeinterfaces. Implements all optional list oper
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
Option (scala)
Top Sublime Text plugins

How to useVectorGroupByOperator$ProcessingModeHashAggregate in org.apache.hadoop.hive.ql.exec.vector

Best Java code snippets using org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator$ProcessingModeHashAggregate (Showing top 20 results out of 315)

How to use
VectorGroupByOperator$ProcessingModeHashAggregate
in
org.apache.hadoop.hive.ql.exec.vector