/** * Get the length of fixed width pivoted GROUP BY key column data * from a deserialized spilled batch. * * @return deserialized length for fixed width data. */ public int getReadableBytesForFixedWidthData() { return fixedKeyColPivotedData.readableBytes(); }
/** * Get the length of variable width pivoted GROUP BY key column data * from a deserialized spilled batch. * * @return deserialized length for variable width data. */ public int getReadableBytesForVariableWidthData() { return variableKeyColPivotedData.readableBytes(); }
public long getByteCount() { return body == null ? 0 : body.readableBytes(); }
private ArrowRecordBatch(boolean dummy, int length, List<ArrowFieldNode> nodes, List<ArrowBuf> buffers) { this.length = length; this.nodes = nodes; this.buffers = buffers; this.closed = false; List<ArrowBuffer> arrowBuffers = new ArrayList<>(); long offset = 0; for (ArrowBuf arrowBuf : buffers) { long size = arrowBuf.readableBytes(); arrowBuffers.add(new ArrowBuffer(offset, size)); offset += size; } this.buffersLayout = Collections.unmodifiableList(arrowBuffers); }
/** * Write the ArrowBuf to stream. * * @param buffer buffer to write * @param output output stream for the spill file * @throws IOException if IO fails */ private void writeArrowBuf(final ArrowBuf buffer, final OutputStream output) throws IOException { final int bufferLength = buffer.readableBytes(); for (int writePos = 0; writePos < bufferLength; writePos += ioBuffer.length) { final int lengthToWrite = Math.min(ioBuffer.length, bufferLength - writePos); buffer.getBytes(writePos, ioBuffer, 0, lengthToWrite); output.write(ioBuffer, 0, lengthToWrite); } }
@Override public String toString(Charset charset) { return toString(readerIndex, readableBytes(), charset); }
/** * Write the contents of a ArrowBuf to a stream. Done this way, rather * than calling the ArrowBuf.getBytes() method, because this method * avoids repeated heap allocation for the intermediate heap buffer. * * @param buf the ArrowBuf to write * @param output the output stream * @throws IOException if a write error occurs */ private void writeBuf(ArrowBuf buf, OutputStream output) throws IOException { int bufLength = buf.readableBytes(); /* Use current thread buffer (safe to do since I/O operation is blocking) */ final byte[] tmpBuffer = REUSABLE_LARGE_BUFFER.get(); for (int posn = 0; posn < bufLength; posn += tmpBuffer.length) { int len = Math.min(tmpBuffer.length, bufLength - posn); buf.getBytes(posn, tmpBuffer, 0, len); output.write(tmpBuffer, 0, len); } }
@Override public void eval() { buffer.clear(); com.dremio.exec.util.ByteBufUtil.HadoopWritables.writeVLong(errorContext, buffer, 0, 9, in.value); out.buffer = buffer; out.start = 0; out.end = buffer.readableBytes(); } }
/** * Compute the size of fixed vector block and variable block size (in bytes) * for all batches of data inserted into the hash table. * * @return total size (in bytes) of fixed and variable blocks in the hash table. */ private long getKeyBlockSizeInBytes() { final int blocks = blocks(); long totalFixedBlockSize = 0; long totalVariableBlockSize = 0; for (int i = 0; i < blocks; i++) { totalFixedBlockSize += fixedBlocks[i].getUnderlying().readableBytes(); totalVariableBlockSize += variableBlocks[i].getUnderlying().readableBytes(); } logger.debug("Hash table blocks: {}, total size of fixed blocks: {}, total size of var blocks: {}", blocks, totalFixedBlockSize, totalVariableBlockSize); return totalFixedBlockSize + totalVariableBlockSize; }
@Override public void eval() { buffer.clear(); com.dremio.exec.util.ByteBufUtil.HadoopWritables.writeVLong(errorContext, buffer, 0, 9, in.value); out.buffer = buffer; out.start = 0; out.end = buffer.readableBytes(); } }
@Override public ByteBuffer nioBuffer() { return nioBuffer(readerIndex(), readableBytes()); }
/** * Construct a record batch from nodes. * * @param length how many rows in this batch * @param nodes field level info * @param buffers will be retained until this recordBatch is closed */ public ArrowRecordBatch(int length, List<ArrowFieldNode> nodes, List<ArrowBuf> buffers, boolean alignBuffers) { super(); this.length = length; this.nodes = nodes; this.buffers = buffers; List<ArrowBuffer> arrowBuffers = new ArrayList<>(); long offset = 0; for (ArrowBuf arrowBuf : buffers) { arrowBuf.retain(); long size = arrowBuf.readableBytes(); arrowBuffers.add(new ArrowBuffer(offset, size)); LOGGER.debug("Buffer in RecordBatch at {}, length: {}", offset, size); offset += size; if (alignBuffers && offset % 8 != 0) { // align on 8 byte boundaries offset += 8 - (offset % 8); } } this.buffersLayout = Collections.unmodifiableList(arrowBuffers); }
@Override public ArrowBuf slice() { return slice(readerIndex(), readableBytes()); }
/** * Get number of records in hashtable batch. We just need to look at fixed block * buffer to count the number of records since both fixed block and variable block * buffers run paralelly (not in terms of length) but in terms of rows (pivoted records) * they store. * * A row in fixed block buffer is equal to block width <validity, all fixed columns, var offset) * and upon insertion of every new entry into hash table, we bump the writer index * in both blocks. So readableBytes in buffer along with block width gives the exact * count of records inserted in a particular hash table batch. * * @param batchIndex hash table batch/block/chunk index * @return number of records in batch */ public int getRecordsInBatch(final int batchIndex) { Preconditions.checkArgument(batchIndex < blocks(), "Error: invalid batch index"); final int records = (fixedBlocks[batchIndex].getUnderlying().readableBytes())/pivot.getBlockWidth(); Preconditions.checkArgument(records <= MAX_VALUES_PER_BATCH, "Error: detected invalid number of records in batch"); return records; }
/** * Computes the size of the serialized body for this recordBatch. */ @Override public int computeBodyLength() { int size = 0; List<ArrowBuf> buffers = getBuffers(); List<ArrowBuffer> buffersLayout = getBuffersLayout(); if (buffers.size() != buffersLayout.size()) { throw new IllegalStateException("the layout does not match: " + buffers.size() + " != " + buffersLayout.size()); } for (int i = 0; i < buffers.size(); i++) { ArrowBuf buffer = buffers.get(i); ArrowBuffer layout = buffersLayout.get(i); size += (layout.getOffset() - size); ByteBuffer nioBuffer = buffer.nioBuffer(buffer.readerIndex(), buffer.readableBytes()); size += nioBuffer.remaining(); if (size % 8 != 0) { size += 8 - (size % 8); } } return size; }
public void write(ArrowBuf buffer) throws IOException { ByteBuffer nioBuffer = buffer.nioBuffer(buffer.readerIndex(), buffer.readableBytes()); write(nioBuffer); }
private void writeCompressedBuf(ArrowBuf buf, OutputStream output) throws IOException { int rawLength = buf.readableBytes(); for (int posn = 0; posn < rawLength; posn += RAW_CHUNK_SIZE_TO_COMPRESS) {
public long getBodySize() { if (batch.getBody() == null) { return 0; } assert batch.getBody().readableBytes() >= 0; return batch.getBody().getPossibleMemoryConsumed(); }
/** * Writes a batch (comprising of one or more ArrowBufs) to the * provided output stream * @param output output stream handle for a spill file * @param writableBatch batch to spill * @param batchDefinition batch metadata * @throws IOException */ private void writeBatchToStreamHelper(final OutputStream output, final HashAggPartitionWritableBatch writableBatch, final HashAggPartitionBatchDefinition batchDefinition) throws IOException { /* write chunk metadata */ writeBatchDefinition(batchDefinition, output); final ArrowBuf[] buffersToSpill = writableBatch.getBuffers(); /* write chunk data */ for (ArrowBuf buffer: buffersToSpill) { spilledDataSize += buffer.readableBytes(); writeArrowBuf(buffer, output); } numBatchesSpilled++; numRecordsSpilled += batchDefinition.accumulatorBatchDef.getRecordCount(); }
/** * swap the bytes in place to get the BE byte order in NullableDecimalVector * @param dataBuffer data buffer of decimal vector */ static void patchDecimal(final ArrowBuf dataBuffer) { final int decimalLength = DecimalVector.TYPE_WIDTH; int startPoint = dataBuffer.readerIndex(); final int valueCount = dataBuffer.readableBytes()/decimalLength; for (int i = 0; i < valueCount; i++) { for (int j = startPoint, k = startPoint + decimalLength - 1; j < k; j++, k--) { final byte firstByte = dataBuffer.getByte(j); final byte lastByte = dataBuffer.getByte(k); dataBuffer.setByte(j, lastByte); dataBuffer.setByte(k, firstByte); } startPoint += decimalLength; } }