/** * Construct a CapacityByteArrayOutputStream configured such that its initial slab size is * determined by {@link #initialSlabSizeHeuristic}, with targetCapacity == maxCapacityHint */ public static CapacityByteArrayOutputStream withTargetNumSlabs( int minSlabSize, int maxCapacityHint, int targetNumSlabs) { return new CapacityByteArrayOutputStream( initialSlabSizeHeuristic(minSlabSize, maxCapacityHint, targetNumSlabs), maxCapacityHint); }
/** * Construct a CapacityByteArrayOutputStream configured such that its initial slab size is * determined by {@link #initialSlabSizeHeuristic}, with targetCapacity == maxCapacityHint */ public static CapacityByteArrayOutputStream withTargetNumSlabs( int minSlabSize, int maxCapacityHint, int targetNumSlabs) { return new CapacityByteArrayOutputStream( initialSlabSizeHeuristic(minSlabSize, maxCapacityHint, targetNumSlabs), maxCapacityHint); }
public ColumnWriterV2( ColumnDescriptor path, PageWriter pageWriter, ParquetProperties parquetProps, int pageSize) { this.path = path; this.pageWriter = pageWriter; resetStatistics(); this.repetitionLevelColumn = new RunLengthBitPackingHybridEncoder(getWidthFromMaxInt(path.getMaxRepetitionLevel()), MIN_SLAB_SIZE, pageSize); this.definitionLevelColumn = new RunLengthBitPackingHybridEncoder(getWidthFromMaxInt(path.getMaxDefinitionLevel()), MIN_SLAB_SIZE, pageSize); int initialSlabSize = CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_SLAB_SIZE, pageSize, 10); this.dataColumn = parquetProps.getValuesWriter(path, initialSlabSize, pageSize); }
public ColumnWriterV1( ColumnDescriptor path, PageWriter pageWriter, int pageSizeThreshold, int dictionaryPageSizeThreshold, boolean enableDictionary, WriterVersion writerVersion) { this.path = path; this.pageWriter = pageWriter; this.pageSizeThreshold = pageSizeThreshold; // initial check of memory usage. So that we have enough data to make an initial prediction this.valueCountForNextSizeCheck = INITIAL_COUNT_FOR_SIZE_CHECK; resetStatistics(); ParquetProperties parquetProps = new ParquetProperties(dictionaryPageSizeThreshold, writerVersion, enableDictionary); this.repetitionLevelColumn = ParquetProperties.getColumnDescriptorValuesWriter(path.getMaxRepetitionLevel(), MIN_SLAB_SIZE, pageSizeThreshold); this.definitionLevelColumn = ParquetProperties.getColumnDescriptorValuesWriter(path.getMaxDefinitionLevel(), MIN_SLAB_SIZE, pageSizeThreshold); int initialSlabSize = CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_SLAB_SIZE, pageSizeThreshold, 10); this.dataColumn = parquetProps.getValuesWriter(path, initialSlabSize, pageSizeThreshold); }
@Override public BytesInput getBytes() { int maxDicId = getDictionarySize() - 1; if (DEBUG) LOG.debug("max dic id " + maxDicId); int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId); int initialSlabSize = CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_INITIAL_SLAB_SIZE, maxDictionaryByteSize, 10); RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(bitWidth, initialSlabSize, maxDictionaryByteSize); IntIterator iterator = encodedValues.iterator(); try { while (iterator.hasNext()) { encoder.writeInt(iterator.next()); } // encodes the bit width byte[] bytesHeader = new byte[] { (byte) bitWidth }; BytesInput rleEncodedBytes = encoder.toBytes(); if (DEBUG) LOG.debug("rle encoded bytes " + rleEncodedBytes.size()); BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes); // remember size of dictionary when we last wrote a page lastUsedDictionarySize = getDictionarySize(); lastUsedDictionaryByteSize = dictionaryByteSize; return bytes; } catch (IOException e) { throw new ParquetEncodingException("could not encode the values", e); } }