private LevelReader buildLevelRLEReader(int maxLevel, Slice slice) { if (maxLevel == 0) { return new LevelNullReader(); } return new LevelRLEReader(new RunLengthBitPackingHybridDecoder(BytesUtils.getWidthFromMaxInt(maxLevel), new ByteArrayInputStream(slice.getBytes()))); }
@Override public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) { int bitWidth = BytesUtils.getWidthFromMaxInt(getMaxLevel(descriptor, valuesType)); if (bitWidth == 0) { return new ZeroIntegerValuesReader(); } return new RunLengthBitPackingHybridValuesReader(bitWidth); } },
/** * @param bound the maximum value stored by this column */ public BitPackingValuesReader(int bound) { this.bitsPerValue = getWidthFromMaxInt(bound); }
/** * * @param bound the maximum value stored by this column */ public BitPackingColumnReader(int bound) { this.bitsPerValue = getWidthFromMaxInt(bound); }
public ByteBitPackingValuesWriter(int bound, Packer packer) { this.packer = packer; this.bitWidth = BytesUtils.getWidthFromMaxInt(bound); this.encoder = new ByteBasedBitPackingEncoder(bitWidth, packer); }
public BoundedIntValuesReader(int bound) { if (bound == 0) { throw new ParquetDecodingException("Value bound cannot be 0. Use DevNullColumnReader instead."); } bitsPerValue = BytesUtils.getWidthFromMaxInt(bound); }
public BoundedIntColumnReader(int bound) { if (bound == 0) { throw new ParquetDecodingException("Value bound cannot be 0. Use DevNullColumnReader instead."); } bitsPerValue = BytesUtils.getWidthFromMaxInt(bound); }
public ByteBitPackingValuesReader(int bound, Packer packer) { this.bitWidth = BytesUtils.getWidthFromMaxInt(bound); this.packer = packer.newBytePacker(bitWidth); }
public static ValuesWriter getColumnDescriptorValuesWriter(int maxLevel, int initialSizePerCol, int pageSize) { if (maxLevel == 0) { return new DevNullValuesWriter(); } else { return new RunLengthBitPackingHybridValuesWriter( getWidthFromMaxInt(maxLevel), initialSizePerCol, pageSize); } }
/** * @param bound the maximum value stored by this column * @param pageSize */ public BitPackingValuesWriter(int bound, int initialCapacity, int pageSize) { this.bitsPerValue = getWidthFromMaxInt(bound); this.out = new CapacityByteArrayOutputStream(initialCapacity, pageSize); init(); }
/** * * @param bound the maximum value stored by this column */ public BitPackingColumnWriter(int bound) { this.bitsPerValue = getWidthFromMaxInt(bound); this.out = new CapacityByteArrayOutputStream(32*1024); // size needed could be small but starting at 32 is really small init(); }
private IntIterator newRLEIterator(int maxLevel, BytesInput bytes) { try { if (maxLevel == 0) { return new NullIntIterator(); } return new RLEIntIterator( new RunLengthBitPackingHybridDecoder( BytesUtils.getWidthFromMaxInt(maxLevel), new ByteArrayInputStream(bytes.toByteArray()))); } catch (IOException e) { throw new ParquetDecodingException("could not read levels in page for col " + path, e); } }
@Override public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) { int bitWidth = BytesUtils.getWidthFromMaxInt(getMaxLevel(descriptor, valuesType)); if(bitWidth == 0) { return new ZeroIntegerValuesReader(); } return new RunLengthBitPackingHybridValuesReader(bitWidth); } },
private ParquetLevelReader buildLevelRLEReader(int maxLevel, BytesInput bytes) { try { if (maxLevel == 0) { return new ParquetLevelNullReader(); } return new ParquetLevelRLEReader(new RunLengthBitPackingHybridDecoder(BytesUtils.getWidthFromMaxInt(maxLevel), new ByteArrayInputStream(bytes.toByteArray()))); } catch (IOException e) { throw new ParquetDecodingException("could not read levels in page for col " + columnDescriptor, e); } }
public ColumnWriterV2( ColumnDescriptor path, PageWriter pageWriter, ParquetProperties parquetProps, int pageSize) { this.path = path; this.pageWriter = pageWriter; resetStatistics(); this.repetitionLevelColumn = new RunLengthBitPackingHybridEncoder(getWidthFromMaxInt(path.getMaxRepetitionLevel()), MIN_SLAB_SIZE, pageSize); this.definitionLevelColumn = new RunLengthBitPackingHybridEncoder(getWidthFromMaxInt(path.getMaxDefinitionLevel()), MIN_SLAB_SIZE, pageSize); int initialSlabSize = CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_SLAB_SIZE, pageSize, 10); this.dataColumn = parquetProps.getValuesWriter(path, initialSlabSize, pageSize); }
@Test public void testWidth() { assertEquals(0, getWidthFromMaxInt(0)); assertEquals(1, getWidthFromMaxInt(1)); assertEquals(2, getWidthFromMaxInt(2)); assertEquals(2, getWidthFromMaxInt(3)); assertEquals(3, getWidthFromMaxInt(4)); assertEquals(3, getWidthFromMaxInt(5)); assertEquals(3, getWidthFromMaxInt(6)); assertEquals(3, getWidthFromMaxInt(7)); assertEquals(4, getWidthFromMaxInt(8)); assertEquals(4, getWidthFromMaxInt(15)); assertEquals(5, getWidthFromMaxInt(16)); assertEquals(5, getWidthFromMaxInt(31)); assertEquals(6, getWidthFromMaxInt(32)); assertEquals(6, getWidthFromMaxInt(63)); assertEquals(7, getWidthFromMaxInt(64)); assertEquals(7, getWidthFromMaxInt(127)); assertEquals(8, getWidthFromMaxInt(128)); assertEquals(8, getWidthFromMaxInt(255)); } }
@Override public BytesInput getBytes() { int maxDicId = getDictionarySize() - 1; if (DEBUG) LOG.debug("max dic id " + maxDicId); int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId); int initialSlabSize = CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_INITIAL_SLAB_SIZE, maxDictionaryByteSize, 10); RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(bitWidth, initialSlabSize, maxDictionaryByteSize); IntIterator iterator = encodedValues.iterator(); try { while (iterator.hasNext()) { encoder.writeInt(iterator.next()); } // encodes the bit width byte[] bytesHeader = new byte[] { (byte) bitWidth }; BytesInput rleEncodedBytes = encoder.toBytes(); if (DEBUG) LOG.debug("rle encoded bytes " + rleEncodedBytes.size()); BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes); // remember size of dictionary when we last wrote a page lastUsedDictionarySize = getDictionarySize(); lastUsedDictionaryByteSize = dictionaryByteSize; return bytes; } catch (IOException e) { throw new ParquetEncodingException("could not encode the values", e); } }