/** * Evaluates the aggregators on the current batch. * The aggregationBatchInfo must have been prepared * by calling {@link #prepareBatchAggregationBufferSets} first. */ protected void processAggregators(VectorizedRowBatch batch) throws HiveException { // We now have a vector of aggregation buffer sets to use for each row // We can start computing the aggregates. // If the number of distinct keys in the batch is 1 we can // use the optimized code path of aggregateInput VectorAggregationBufferRow[] aggregationBufferSets = aggregationBatchInfo.getAggregationBuffers(); if (aggregationBatchInfo.getDistinctBufferSetCount() == 1) { VectorAggregateExpression.AggregationBuffer[] aggregationBuffers = aggregationBufferSets[0].getAggregationBuffers(); for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInput(aggregationBuffers[i], batch); } } else { for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInputSelection( aggregationBufferSets, i, batch); } } }
/** * allocates a new aggregation buffer set. */ protected VectorAggregationBufferRow allocateAggregationBuffer() throws HiveException { VectorAggregateExpression.AggregationBuffer[] aggregationBuffers = new VectorAggregateExpression.AggregationBuffer[aggregators.length]; for (int i=0; i < aggregators.length; ++i) { aggregationBuffers[i] = aggregators[i].getNewAggregationBuffer(); aggregators[i].reset(aggregationBuffers[i]); } VectorAggregationBufferRow bufferSet = new VectorAggregationBufferRow(aggregationBuffers); return bufferSet; }
public void compileAggregationBatchInfo(VectorAggregateExpression[] aggregators) { JavaDataModel model = JavaDataModel.get(); int[] variableSizeAggregators = new int[aggregators.length]; int indexVariableSizes = 0; aggregatorsFixedSize = JavaDataModel.alignUp( model.object() + model.primitive1()*2 + model.ref(), model.memoryAlign()); aggregatorsFixedSize += model.lengthForObjectArrayOfSize(aggregators.length); for(int i=0;i<aggregators.length;++i) { VectorAggregateExpression aggregator = aggregators[i]; aggregatorsFixedSize += aggregator.getAggregationBufferFixedSize(); if (aggregator.hasVariableSize()) { variableSizeAggregators[indexVariableSizes] = i; ++indexVariableSizes; } } this.variableSizeAggregators = Arrays.copyOfRange( variableSizeAggregators, 0, indexVariableSizes); }
@Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append(this.getClass().getSimpleName()); VectorExpression inputExpression = inputExpression(); if (inputExpression != null) { sb.append("("); sb.append(inputExpression.toString()); sb.append(") -> "); } else { sb.append("(*) -> "); } ObjectInspector outputObjectInspector = getOutputObjectInspector(); sb.append(outputObjectInspector.getTypeName()); return sb.toString(); } }
"(VectorAggregationDesc) object ", e); VectorExpression.doTransientInit(vecAggrExpr.getInputExpression()); if (bufferRow == null) { VectorAggregateExpression.AggregationBuffer aggregationBuffer = vecAggrExpr.getNewAggregationBuffer(); aggregationBuffer.reset(); VectorAggregateExpression.AggregationBuffer[] aggregationBuffers = vecAggrExpr.aggregateInputSelection( batchBufferRows, 0, outputColumnNames, outputTypeInfos, new DataTypePhysicalVariation[] { vecAggrExpr.getOutputDataTypePhysicalVariation() }, VectorAggregateExpression.AggregationBuffer aggregationBuffer = vectorAggregationBufferRow.getAggregationBuffer(0); vecAggrExpr.assignRowColumn(outputBatch, outputBatch.size++, 0, aggregationBuffer);
@Override public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInput(aggregationBuffers.getAggregationBuffer(i), batch); } }
aggregators[i].init(conf.getAggregators().get(i)); objectInspectors.add(aggregators[i].getOutputObjectInspector());
/** * Emits a (reduce) group row, made from the key (copied in at the beginning of the group) and * the row aggregation buffers values * @param agg * @param buffer * @throws HiveException */ private void writeGroupRow(VectorAggregationBufferRow agg, DataOutputBuffer buffer) throws HiveException { int fi = outputKeyLength; // Start after group keys. for (int i = 0; i < aggregators.length; ++i) { vectorAssignRow.assignRowColumn(outputBatch, outputBatch.size, fi++, aggregators[i].evaluateOutput(agg.getAggregationBuffer(i))); } ++outputBatch.size; if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { flushOutput(); buffer.reset(); } }
public static ObjectInspector.Category aggregationOutputCategory(VectorAggregateExpression vectorAggrExpr) { ObjectInspector outputObjInspector = vectorAggrExpr.getOutputObjectInspector(); return outputObjInspector.getCategory(); }
/** * Emits a (reduce) group row, made from the key (copied in at the beginning of the group) and * the row aggregation buffers values * @param agg * @param buffer * @throws HiveException */ private void writeGroupRow(VectorAggregationBufferRow agg, DataOutputBuffer buffer) throws HiveException { int colNum = outputKeyLength; // Start after group keys. final int batchIndex = outputBatch.size; for (int i = 0; i < aggregators.length; ++i) { aggregators[i].assignRowColumn(outputBatch, batchIndex, colNum++, agg.getAggregationBuffer(i)); } ++outputBatch.size; if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { flushOutput(); buffer.reset(); } }
"(VectorAggregationDesc) object ", e); VectorExpression.doTransientInit(vecAggrExpr.getInputExpression()); aggregators[i] = vecAggrExpr;
@Override public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInput(aggregationBuffers.getAggregationBuffer(i), batch); } }
aggregators[i].init(conf.getAggregators().get(i)); objectInspectors.add(aggregators[i].getOutputObjectInspector());
forwardCache[fi++] = aggregators[i].evaluateOutput(agg.getAggregationBuffer(i)); aggregators[i].evaluateOutput(agg.getAggregationBuffer(i)));
private boolean aggregatorsOutputIsPrimitive(AggregationDesc aggDesc, boolean isReduce) { VectorizationContext vc = new ValidatorVectorizationContext(); VectorAggregateExpression vectorAggrExpr; try { vectorAggrExpr = vc.getAggregatorExpression(aggDesc, isReduce); } catch (Exception e) { // We should have already attempted to vectorize in validateAggregationDesc. LOG.info("Vectorization of aggreation should have succeeded ", e); return false; } ObjectInspector outputObjInspector = vectorAggrExpr.getOutputObjectInspector(); if (outputObjInspector.getCategory() == ObjectInspector.Category.PRIMITIVE) { return true; } return false; }
/** * Emits a single row, made from the key and the row aggregation buffers values * kw is null if keyExpressions.length is 0 * @param kw * @param agg * @throws HiveException */ private void writeSingleRow(VectorHashKeyWrapperBase kw, VectorAggregationBufferRow agg) throws HiveException { int colNum = 0; final int batchIndex = outputBatch.size; // Output keys and aggregates into the output batch. for (int i = 0; i < outputKeyLength; ++i) { keyWrappersBatch.assignRowColumn(outputBatch, batchIndex, colNum++, kw); } for (int i = 0; i < aggregators.length; ++i) { aggregators[i].assignRowColumn(outputBatch, batchIndex, colNum++, agg.getAggregationBuffer(i)); } ++outputBatch.size; if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { flushOutput(); } }
/** * allocates a new aggregation buffer set. */ protected VectorAggregationBufferRow allocateAggregationBuffer() throws HiveException { VectorAggregateExpression.AggregationBuffer[] aggregationBuffers = new VectorAggregateExpression.AggregationBuffer[aggregators.length]; for (int i=0; i < aggregators.length; ++i) { aggregationBuffers[i] = aggregators[i].getNewAggregationBuffer(); aggregators[i].reset(aggregationBuffers[i]); } VectorAggregationBufferRow bufferSet = new VectorAggregationBufferRow(aggregationBuffers); return bufferSet; }
/** * Evaluates the aggregators on the current batch. * The aggregationBatchInfo must have been prepared * by calling {@link #prepareBatchAggregationBufferSets} first. */ protected void processAggregators(VectorizedRowBatch batch) throws HiveException { // We now have a vector of aggregation buffer sets to use for each row // We can start computing the aggregates. // If the number of distinct keys in the batch is 1 we can // use the optimized code path of aggregateInput VectorAggregationBufferRow[] aggregationBufferSets = aggregationBatchInfo.getAggregationBuffers(); if (aggregationBatchInfo.getDistinctBufferSetCount() == 1) { VectorAggregateExpression.AggregationBuffer[] aggregationBuffers = aggregationBufferSets[0].getAggregationBuffers(); for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInput(aggregationBuffers[i], batch); } } else { for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInputSelection( aggregationBufferSets, i, batch); } } }
public void compileAggregationBatchInfo(VectorAggregateExpression[] aggregators) { JavaDataModel model = JavaDataModel.get(); int[] variableSizeAggregators = new int[aggregators.length]; int indexVariableSizes = 0; aggregatorsFixedSize = JavaDataModel.alignUp( model.object() + model.primitive1()*2 + model.ref(), model.memoryAlign()); aggregatorsFixedSize += model.lengthForObjectArrayOfSize(aggregators.length); for(int i=0;i<aggregators.length;++i) { VectorAggregateExpression aggregator = aggregators[i]; aggregatorsFixedSize += aggregator.getAggregationBufferFixedSize(); if (aggregator.hasVariableSize()) { variableSizeAggregators[indexVariableSizes] = i; ++indexVariableSizes; } } this.variableSizeAggregators = Arrays.copyOfRange( variableSizeAggregators, 0, indexVariableSizes); }
@Override public void doProcessBatch(VectorizedRowBatch batch, boolean isFirstGroupingSet, boolean[] currentGroupingSetsOverrideIsNulls) throws HiveException { assert(inGroup); if (first) { // Copy the group key to output batch now. We'll copy in the aggregates at the end of the group. first = false; groupKeyHelper.copyGroupKey(batch, outputBatch, buffer); } // Aggregate this batch. for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregateInput(groupAggregators.getAggregationBuffer(i), batch); } }