/** * uses a trick mentioned in https://developers.google.com/protocol-buffers/docs/encoding to read zigZag encoded data * @param in an input stream * @return the value of a zig-zag varint read from the current position in the stream * @throws IOException if there is an exception while reading */ public static int readZigZagVarInt(InputStream in) throws IOException { int raw = readUnsignedVarInt(in); int temp = (((raw << 31) >> 31) ^ raw) >> 1; return temp ^ (raw & (1 << 31)); }
/** * uses a trick mentioned in https://developers.google.com/protocol-buffers/docs/encoding to read zigZag encoded data * @param in an input stream * @return the value of a zig-zag varint read from the current position in the stream * @throws IOException if there is an exception while reading */ public static int readZigZagVarInt(InputStream in) throws IOException { int raw = readUnsignedVarInt(in); int temp = (((raw << 31) >> 31) ^ raw) >> 1; return temp ^ (raw & (1 << 31)); }
public static DeltaBinaryPackingConfig readConfig(InputStream in) throws IOException { return new DeltaBinaryPackingConfig(BytesUtils.readUnsignedVarInt(in), BytesUtils.readUnsignedVarInt(in)); }
public static DeltaBinaryPackingConfig readConfig(InputStream in) throws IOException { return new DeltaBinaryPackingConfig(BytesUtils.readUnsignedVarInt(in), BytesUtils.readUnsignedVarInt(in)); }
private void readNext() throws IOException { Preconditions.checkArgument(in.available() > 0, "Reading past RLE/BitPacking stream."); final int header = BytesUtils.readUnsignedVarInt(in); mode = (header & 1) == 0 ? MODE.RLE : MODE.PACKED; switch (mode) { case RLE: currentCount = header >>> 1; LOG.debug("reading {} values RLE", currentCount); currentValue = BytesUtils.readIntLittleEndianPaddedOnBitWidth(in, bitWidth); break; case PACKED: int numGroups = header >>> 1; currentCount = numGroups * 8; LOG.debug("reading {} values BIT PACKED", currentCount); currentBuffer = new int[currentCount]; // TODO: reuse a buffer byte[] bytes = new byte[numGroups * bitWidth]; // At the end of the file RLE data though, there might not be that many bytes left. int bytesToRead = (int)Math.ceil(currentCount * bitWidth / 8.0); bytesToRead = Math.min(bytesToRead, in.available()); new DataInputStream(in).readFully(bytes, 0, bytesToRead); for (int valueIndex = 0, byteIndex = 0; valueIndex < currentCount; valueIndex += 8, byteIndex += bitWidth) { packer.unpack8Values(bytes, byteIndex, currentBuffer, valueIndex); } break; default: throw new ParquetDecodingException("not a valid mode " + mode); } } }
private void readNext() throws IOException { Preconditions.checkArgument(in.available() > 0, "Reading past RLE/BitPacking stream."); final int header = BytesUtils.readUnsignedVarInt(in); mode = (header & 1) == 0 ? MODE.RLE : MODE.PACKED; switch (mode) { case RLE: currentCount = header >>> 1; LOG.debug("reading {} values RLE", currentCount); currentValue = BytesUtils.readIntLittleEndianPaddedOnBitWidth(in, bitWidth); break; case PACKED: int numGroups = header >>> 1; currentCount = numGroups * 8; LOG.debug("reading {} values BIT PACKED", currentCount); currentBuffer = new int[currentCount]; // TODO: reuse a buffer byte[] bytes = new byte[numGroups * bitWidth]; // At the end of the file RLE data though, there might not be that many bytes left. int bytesToRead = (int)Math.ceil(currentCount * bitWidth / 8.0); bytesToRead = Math.min(bytesToRead, in.available()); new DataInputStream(in).readFully(bytes, 0, bytesToRead); for (int valueIndex = 0, byteIndex = 0; valueIndex < currentCount; valueIndex += 8, byteIndex += bitWidth) { packer.unpack8Values(bytes, byteIndex, currentBuffer, valueIndex); } break; default: throw new ParquetDecodingException("not a valid mode " + mode); } } }
/** * eagerly loads all the data into memory */ @Override public void initFromPage(int valueCount, ByteBufferInputStream stream) throws IOException { this.in = stream; this.config = DeltaBinaryPackingConfig.readConfig(in); this.totalValueCount = BytesUtils.readUnsignedVarInt(in); allocateValuesBuffer(); bitWidths = new int[config.miniBlockNumInABlock]; //read first value from header valuesBuffer[valuesBuffered++] = BytesUtils.readZigZagVarLong(in); while (valuesBuffered < totalValueCount) { //values Buffered could be more than totalValueCount, since we flush on a mini block basis loadNewBlockToBuffer(); } }
/** * eagerly loads all the data into memory */ @Override public void initFromPage(int valueCount, ByteBufferInputStream stream) throws IOException { this.in = stream; long startPos = in.position(); this.config = DeltaBinaryPackingConfig.readConfig(in); this.totalValueCount = BytesUtils.readUnsignedVarInt(in); allocateValuesBuffer(); bitWidths = new int[config.miniBlockNumInABlock]; //read first value from header valuesBuffer[valuesBuffered++] = BytesUtils.readZigZagVarLong(in); while (valuesBuffered < totalValueCount) { //values Buffered could be more than totalValueCount, since we flush on a mini block basis loadNewBlockToBuffer(); } updateNextOffset((int) (in.position() - startPos)); }