private IntIterator newRLEIterator(int maxLevel, BytesInput bytes) { try { if (maxLevel == 0) { return new NullIntIterator(); } return new RLEIntIterator( new RunLengthBitPackingHybridDecoder( BytesUtils.getWidthFromMaxInt(maxLevel), new ByteArrayInputStream(bytes.toByteArray()))); } catch (IOException e) { throw new ParquetDecodingException("could not read levels in page for col " + descriptor, e); } }
private void readPageV1(DataPageV1 page) { ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL); ValuesReader dlReader = page.getDlEncoding().getValuesReader(descriptor, DEFINITION_LEVEL); this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader); this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader); try { BytesInput bytes = page.getBytes(); LOG.debug("page size " + bytes.size() + " bytes and " + pageValueCount + " records"); ByteBufferInputStream in = bytes.toInputStream(); LOG.debug("reading repetition levels at " + in.position()); rlReader.initFromPage(pageValueCount, in); LOG.debug("reading definition levels at " + in.position()); dlReader.initFromPage(pageValueCount, in); LOG.debug("reading data at " + in.position()); initDataReader(page.getValueEncoding(), in, page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e); } }
/** * Creates a reader for definition and repetition levels, returning an optimized one if * the levels are not needed. */ protected static IntIterator createRLEIterator( int maxLevel, BytesInput bytes, ColumnDescriptor descriptor) throws IOException { try { if (maxLevel == 0) return new NullIntIterator(); return new RLEIntIterator( new RunLengthBitPackingHybridDecoder( BytesUtils.getWidthFromMaxInt(maxLevel), bytes.toInputStream())); } catch (IOException e) { throw new IOException("could not read levels in page for col " + descriptor, e); } }
@Override public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { this.in = in; if (fixedWidth) { // initialize for repetition and definition levels if (readLength) { int length = readIntLittleEndian(); this.in = in.sliceStream(length); } } else { // initialize for values if (in.available() > 0) { init(in.read()); } } if (bitWidth == 0) { // 0 bit width, treat this as an RLE run of valueCount number of 0's. this.mode = MODE.RLE; this.currentCount = valueCount; this.currentValue = 0; } else { this.currentCount = 0; } }
private void readPageV2(DataPageV2 page) { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = newRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevelColumn = newRLEIterator(descriptor.getMaxDefinitionLevel(), page.getDefinitionLevels()); try { LOG.debug("page data size " + page.getData().size() + " bytes and " + pageValueCount + " records"); initDataReader(page.getDataEncoding(), page.getData().toInputStream(), page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e); } }
/** * Initializes the internal state for decoding ints of `bitWidth`. */ private void init(int bitWidth) { Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32"); this.bitWidth = bitWidth; this.bytesWidth = BytesUtils.paddedByteCountFromBits(bitWidth); this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth); }
- ParquetFooterInputFromCache.FOOTER_LENGTH_SIZE - ParquetFileWriter.MAGIC.length; stream.seek(footerLengthIndex); int footerLength = BytesUtils.readIntLittleEndian(stream); stream.seek(footerLengthIndex - footerLength); if (LOG.isInfoEnabled()) {
private ByteBuffer getBuffer(int length) { try { return in.slice(length).order(ByteOrder.LITTLE_ENDIAN); } catch (IOException e) { throw new ParquetDecodingException("Failed to read " + length + " bytes", e); } }
/** * Creates a reader for definition and repetition levels, returning an optimized one if * the levels are not needed. */ protected static IntIterator createRLEIterator(int maxLevel, BytesInput bytes, ColumnDescriptor descriptor) throws IOException { try { if (maxLevel == 0) return new NullIntIterator(); return new RLEIntIterator( new RunLengthBitPackingHybridDecoder( BytesUtils.getWidthFromMaxInt(maxLevel), new ByteArrayInputStream(bytes.toByteArray()))); } catch (IOException e) { throw new IOException("could not read levels in page for col " + descriptor, e); } }
/** * Creates a reader for definition and repetition levels, returning an optimized one if * the levels are not needed. */ protected static IntIterator createRLEIterator( int maxLevel, BytesInput bytes, ColumnDescriptor descriptor) throws IOException { try { if (maxLevel == 0) return new NullIntIterator(); return new RLEIntIterator( new RunLengthBitPackingHybridDecoder( BytesUtils.getWidthFromMaxInt(maxLevel), bytes.toInputStream())); } catch (IOException e) { throw new IOException("could not read levels in page for col " + descriptor, e); } }
@Override public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { this.in = in; if (fixedWidth) { // initialize for repetition and definition levels if (readLength) { int length = readIntLittleEndian(); this.in = in.sliceStream(length); } } else { // initialize for values if (in.available() > 0) { init(in.read()); } } if (bitWidth == 0) { // 0 bit width, treat this as an RLE run of valueCount number of 0's. this.mode = MODE.RLE; this.currentCount = valueCount; this.currentValue = 0; } else { this.currentCount = 0; } }
/** * Initializes the internal state for decoding ints of `bitWidth`. */ private void init(int bitWidth) { Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32"); this.bitWidth = bitWidth; this.bytesWidth = BytesUtils.paddedByteCountFromBits(bitWidth); this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth); }
private ByteBuffer getBuffer(int length) { try { return in.slice(length).order(ByteOrder.LITTLE_ENDIAN); } catch (IOException e) { throw new ParquetDecodingException("Failed to read " + length + " bytes", e); } }
/** * Initializes the internal state for decoding ints of `bitWidth`. */ private void init(int bitWidth) { Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32"); this.bitWidth = bitWidth; this.bytesWidth = BytesUtils.paddedByteCountFromBits(bitWidth); this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth); }
/** * Reads the next varint encoded int. */ private int readUnsignedVarInt() throws IOException { int value = 0; int shift = 0; int b; do { b = in.read(); value |= (b & 0x7F) << shift; shift += 7; } while ((b & 0x80) != 0); return value; }
/** * Reads the next varint encoded int. */ private int readUnsignedVarInt() throws IOException { int value = 0; int shift = 0; int b; do { b = in.read(); value |= (b & 0x7F) << shift; shift += 7; } while ((b & 0x80) != 0); return value; }
@Override public final boolean readBoolean() { // TODO: vectorize decoding and keep boolean[] instead of currentByte if (bitOffset == 0) { try { currentByte = (byte) in.read(); } catch (IOException e) { throw new ParquetDecodingException("Failed to read a byte", e); } } boolean v = (currentByte & (1 << bitOffset)) != 0; bitOffset += 1; if (bitOffset == 8) { bitOffset = 0; } return v; }
@Override public final boolean readBoolean() { // TODO: vectorize decoding and keep boolean[] instead of currentByte if (bitOffset == 0) { try { currentByte = (byte) in.read(); } catch (IOException e) { throw new ParquetDecodingException("Failed to read a byte", e); } } boolean v = (currentByte & (1 << bitOffset)) != 0; bitOffset += 1; if (bitOffset == 8) { bitOffset = 0; } return v; }