public BitWriter(int initialCapacity, int pageSize) { this.baos = new CapacityByteArrayOutputStream(initialCapacity, pageSize); }
public long getAllocatedSize() { return baos.getCapacity(); } }
/** * * {@inheritDoc} * @see parquet.column.primitive.PrimitiveColumnWriter#getBufferedSize() */ @Override public long getBufferedSize() { return out.size(); }
/** * Construct a CapacityByteArrayOutputStream configured such that its initial slab size is * determined by {@link #initialSlabSizeHeuristic}, with targetCapacity == maxCapacityHint */ public static CapacityByteArrayOutputStream withTargetNumSlabs( int minSlabSize, int maxCapacityHint, int targetNumSlabs) { return new CapacityByteArrayOutputStream( initialSlabSizeHeuristic(minSlabSize, maxCapacityHint, targetNumSlabs), maxCapacityHint); }
private void reset(boolean resetBaos) { if (resetBaos) { this.baos.reset(); } this.previousValue = 0; this.numBufferedValues = 0; this.repeatCount = 0; this.bitPackedGroupCount = 0; this.bitPackedRunHeaderPointer = -1; this.toBytesCalled = false; }
public void writeBit(boolean bit) { if (DEBUG) LOG.debug("writing: " + (bit ? "1" : "0")); currentByte = setBytePosition(currentByte, currentBytePosition++, bit); if (currentBytePosition == 8) { baos.write(currentByte); if (DEBUG) LOG.debug("to buffer: " + toBinary(currentByte)); currentByte = 0; currentBytePosition = 0; } }
@Override public String memUsageString(String prefix) { return arrayOut.memUsageString(prefix + " PLAIN"); } }
public byte[] finish() { if (!finished) { if (currentBytePosition > 0) { baos.write(currentByte); if (DEBUG) LOG.debug("to buffer: " + toBinary(currentByte)); } } byte[] buf = baos.toByteArray(); finished = true; return buf; }
@Override public void write(int b) { if (currentSlabIndex == currentSlab.length) { addSlab(1); } currentSlab[currentSlabIndex] = (byte) b; currentSlabIndex += 1; bytesUsed += 1; }
private void writeOrAppendBitPackedRun() throws IOException { if (bitPackedGroupCount >= 63) { // we've packed as many values as we can for this run, // end it and start a new one endPreviousBitPackedRun(); } if (bitPackedRunHeaderPointer == -1) { // this is a new bit-packed-run, allocate a byte for the header // and keep a "pointer" to it so that it can be mutated later baos.write(0); // write a sentinel value bitPackedRunHeaderPointer = baos.getCurrentIndex(); } packer.pack8Values(bufferedValues, 0, packBuffer, 0); baos.write(packBuffer); // empty the buffer, they've all been written numBufferedValues = 0; // clear the repeat count, as some repeated values // may have just been bit packed into this run repeatCount = 0; ++bitPackedGroupCount; }
public ColumnWriterV2( ColumnDescriptor path, PageWriter pageWriter, ParquetProperties parquetProps, int pageSize) { this.path = path; this.pageWriter = pageWriter; resetStatistics(); this.repetitionLevelColumn = new RunLengthBitPackingHybridEncoder(getWidthFromMaxInt(path.getMaxRepetitionLevel()), MIN_SLAB_SIZE, pageSize); this.definitionLevelColumn = new RunLengthBitPackingHybridEncoder(getWidthFromMaxInt(path.getMaxDefinitionLevel()), MIN_SLAB_SIZE, pageSize); int initialSlabSize = CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_SLAB_SIZE, pageSize, 10); this.dataColumn = parquetProps.getValuesWriter(path, initialSlabSize, pageSize); }
@Override public void writeAllTo(OutputStream out) throws IOException { arrayOut.writeTo(out); }
public void reset() { baos.reset(); currentByte = 0; currentBytePosition = 0; finished = false; }
/** * Construct a CapacityByteArrayOutputStream configured such that its initial slab size is * determined by {@link #initialSlabSizeHeuristic}, with targetCapacity == maxCapacityHint */ public static CapacityByteArrayOutputStream withTargetNumSlabs( int minSlabSize, int maxCapacityHint, int targetNumSlabs) { return new CapacityByteArrayOutputStream( initialSlabSizeHeuristic(minSlabSize, maxCapacityHint, targetNumSlabs), maxCapacityHint); }
public void writeBit(boolean bit) { if (DEBUG) LOG.debug("writing: " + (bit ? "1" : "0")); currentByte = setBytePosition(currentByte, currentBytePosition++, bit); if (currentBytePosition == 8) { baos.write(currentByte); if (DEBUG) LOG.debug("to buffer: " + toBinary(currentByte)); currentByte = 0; currentBytePosition = 0; } }
@Override public String memUsageString(String prefix) { return buf.memUsageString(prefix + " ColumnChunkPageWriter"); } }
@Override public void write(int b) { if (currentSlabIndex == currentSlab.length) { addSlab(1); } currentSlab[currentSlabIndex] = (byte) b; currentSlabIndex += 1; bytesUsed += 1; }
public ColumnWriterV1( ColumnDescriptor path, PageWriter pageWriter, int pageSizeThreshold, int dictionaryPageSizeThreshold, boolean enableDictionary, WriterVersion writerVersion) { this.path = path; this.pageWriter = pageWriter; this.pageSizeThreshold = pageSizeThreshold; // initial check of memory usage. So that we have enough data to make an initial prediction this.valueCountForNextSizeCheck = INITIAL_COUNT_FOR_SIZE_CHECK; resetStatistics(); ParquetProperties parquetProps = new ParquetProperties(dictionaryPageSizeThreshold, writerVersion, enableDictionary); this.repetitionLevelColumn = ParquetProperties.getColumnDescriptorValuesWriter(path.getMaxRepetitionLevel(), MIN_SLAB_SIZE, pageSizeThreshold); this.definitionLevelColumn = ParquetProperties.getColumnDescriptorValuesWriter(path.getMaxDefinitionLevel(), MIN_SLAB_SIZE, pageSizeThreshold); int initialSlabSize = CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_SLAB_SIZE, pageSizeThreshold, 10); this.dataColumn = parquetProps.getValuesWriter(path, initialSlabSize, pageSizeThreshold); }
@Override public void writeAllTo(OutputStream out) throws IOException { arrayOut.writeTo(out); }
@Override public long getBufferedSize() { return arrayOut.size(); }