/** * Construct a CapacityByteArrayOutputStream configured such that its initial slab size is * determined by {@link #initialSlabSizeHeuristic}, with targetCapacity == maxCapacityHint * * @param minSlabSize a minimum slab size * @param maxCapacityHint a hint for the maximum required capacity * @param targetNumSlabs the target number of slabs * @param allocator an allocator to use when creating byte buffers for slabs * @return a capacity baos */ public static CapacityByteArrayOutputStream withTargetNumSlabs( int minSlabSize, int maxCapacityHint, int targetNumSlabs, ByteBufferAllocator allocator) { return new CapacityByteArrayOutputStream( initialSlabSizeHeuristic(minSlabSize, maxCapacityHint, targetNumSlabs), maxCapacityHint, allocator); }
/** * {@inheritDoc} * @see org.apache.parquet.column.values.ValuesWriter#getAllocatedSize() */ @Override public long getAllocatedSize() { return out.getCapacity(); }
/** * {@inheritDoc} * @see org.apache.parquet.column.values.ValuesWriter#getBufferedSize() */ @Override public long getBufferedSize() { return out.size(); }
public FixedLenByteArrayPlainValuesWriter(int length, int initialSize, int pageSize, ByteBufferAllocator allocator) { this.length = length; this.allocator = allocator; this.arrayOut = new CapacityByteArrayOutputStream(initialSize, pageSize, this.allocator); this.out = new LittleEndianDataOutputStream(arrayOut); }
@Override public void reset() { this.totalValueCount = 0; this.baos.reset(); this.deltaValuesToFlush = 0; }
private ParquetProperties(WriterVersion writerVersion, int pageSize, int dictPageSize, boolean enableDict, int minRowCountForPageSizeCheck, int maxRowCountForPageSizeCheck, boolean estimateNextSizeCheck, ByteBufferAllocator allocator, ValuesWriterFactory writerFactory) { this.pageSizeThreshold = pageSize; this.initialSlabSize = CapacityByteArrayOutputStream .initialSlabSizeHeuristic(MIN_SLAB_SIZE, pageSizeThreshold, 10); this.dictionaryPageSizeThreshold = dictPageSize; this.writerVersion = writerVersion; this.enableDictionary = enableDict; this.minRowCountForPageSizeCheck = minRowCountForPageSizeCheck; this.maxRowCountForPageSizeCheck = maxRowCountForPageSizeCheck; this.estimateNextSizeCheck = estimateNextSizeCheck; this.allocator = allocator; this.valuesWriterFactory = writerFactory; }
@Override public void close() { this.totalValueCount = 0; this.baos.close(); this.deltaValuesToFlush = 0; }
@Override public String memUsageString(String prefix) { return arrayOut.memUsageString(prefix + " PLAIN"); } }
private void writeOrAppendBitPackedRun() throws IOException { if (bitPackedGroupCount >= 63) { // we've packed as many values as we can for this run, // end it and start a new one endPreviousBitPackedRun(); } if (bitPackedRunHeaderPointer == -1) { // this is a new bit-packed-run, allocate a byte for the header // and keep a "pointer" to it so that it can be mutated later baos.write(0); // write a sentinel value bitPackedRunHeaderPointer = baos.getCurrentIndex(); } packer.pack8Values(bufferedValues, 0, packBuffer, 0); baos.write(packBuffer); // empty the buffer, they've all been written numBufferedValues = 0; // clear the repeat count, as some repeated values // may have just been bit packed into this run repeatCount = 0; ++bitPackedGroupCount; }
@Override public void write(int b) { if (!currentSlab.hasRemaining()) { addSlab(1); } currentSlab.put(currentSlabIndex, (byte) b); currentSlabIndex += 1; currentSlab.position(currentSlabIndex); bytesUsed += 1; }
/** * If we are currently writing a bit-packed-run, update the * bit-packed-header and consider this run to be over * * does nothing if we're not currently writing a bit-packed run */ private void endPreviousBitPackedRun() { if (bitPackedRunHeaderPointer == -1) { // we're not currently in a bit-packed-run return; } // create bit-packed-header, which needs to fit in 1 byte byte bitPackHeader = (byte) ((bitPackedGroupCount << 1) | 1); // update this byte baos.setByte(bitPackedRunHeaderPointer, bitPackHeader); // mark that this run is over bitPackedRunHeaderPointer = -1; // reset the number of groups bitPackedGroupCount = 0; }
public DeltaBinaryPackingValuesWriter(int blockSizeInValues, int miniBlockNum, int slabSize, int pageSize, ByteBufferAllocator allocator) { this.config = new DeltaBinaryPackingConfig(blockSizeInValues, miniBlockNum); bitWidths = new int[config.miniBlockNumInABlock]; baos = new CapacityByteArrayOutputStream(slabSize, pageSize, allocator); }
private void reset(boolean resetBaos) { if (resetBaos) { this.baos.reset(); } this.previousValue = 0; this.numBufferedValues = 0; this.repeatCount = 0; this.bitPackedGroupCount = 0; this.bitPackedRunHeaderPointer = -1; this.toBytesCalled = false; }
private ParquetProperties(WriterVersion writerVersion, int pageSize, int dictPageSize, boolean enableDict, int minRowCountForPageSizeCheck, int maxRowCountForPageSizeCheck, boolean estimateNextSizeCheck, ByteBufferAllocator allocator, ValuesWriterFactory writerFactory, int columnIndexMinMaxTruncateLength, int pageRowCountLimit) { this.pageSizeThreshold = pageSize; this.initialSlabSize = CapacityByteArrayOutputStream .initialSlabSizeHeuristic(MIN_SLAB_SIZE, pageSizeThreshold, 10); this.dictionaryPageSizeThreshold = dictPageSize; this.writerVersion = writerVersion; this.enableDictionary = enableDict; this.minRowCountForPageSizeCheck = minRowCountForPageSizeCheck; this.maxRowCountForPageSizeCheck = maxRowCountForPageSizeCheck; this.estimateNextSizeCheck = estimateNextSizeCheck; this.allocator = allocator; this.valuesWriterFactory = writerFactory; this.columnIndexTruncateLength = columnIndexMinMaxTruncateLength; this.pageRowCountLimit = pageRowCountLimit; }
@Override public void close() { buf.close(); } }
@Override public String memUsageString(String prefix) { return out.memUsageString(prefix); }
private void writeOrAppendBitPackedRun() throws IOException { if (bitPackedGroupCount >= 63) { // we've packed as many values as we can for this run, // end it and start a new one endPreviousBitPackedRun(); } if (bitPackedRunHeaderPointer == -1) { // this is a new bit-packed-run, allocate a byte for the header // and keep a "pointer" to it so that it can be mutated later baos.write(0); // write a sentinel value bitPackedRunHeaderPointer = baos.getCurrentIndex(); } packer.pack8Values(bufferedValues, 0, packBuffer, 0); baos.write(packBuffer); // empty the buffer, they've all been written numBufferedValues = 0; // clear the repeat count, as some repeated values // may have just been bit packed into this run repeatCount = 0; ++bitPackedGroupCount; }
@Override public void write(int b) { if (!currentSlab.hasRemaining()) { addSlab(1); } currentSlab.put(currentSlabIndex, (byte) b); currentSlabIndex += 1; currentSlab.position(currentSlabIndex); bytesUsed += 1; }
/** * If we are currently writing a bit-packed-run, update the * bit-packed-header and consider this run to be over * * does nothing if we're not currently writing a bit-packed run */ private void endPreviousBitPackedRun() { if (bitPackedRunHeaderPointer == -1) { // we're not currently in a bit-packed-run return; } // create bit-packed-header, which needs to fit in 1 byte byte bitPackHeader = (byte) ((bitPackedGroupCount << 1) | 1); // update this byte baos.setByte(bitPackedRunHeaderPointer, bitPackHeader); // mark that this run is over bitPackedRunHeaderPointer = -1; // reset the number of groups bitPackedGroupCount = 0; }
/** * {@inheritDoc} * @see org.apache.parquet.column.values.ValuesWriter#getBufferedSize() */ @Override public long getBufferedSize() { return out.size(); }