/** * eagerly load all the data into memory * * @param valueCount count of values in this page * @param page the array to read from containing the page data (repetition levels, definition levels, data) * @param offset where to start reading from in the page * @throws IOException */ @Override public void initFromPage(int valueCount, byte[] page, int offset) throws IOException { in = new ByteArrayInputStream(page, offset, page.length - offset); this.config = DeltaBinaryPackingConfig.readConfig(in); this.page = page; this.totalValueCount = BytesUtils.readUnsignedVarInt(in); allocateValuesBuffer(); bitWidths = new int[config.miniBlockNumInABlock]; //read first value from header valuesBuffer[valuesBuffered++] = BytesUtils.readZigZagVarInt(in); while (valuesBuffered < totalValueCount) { //values Buffered could be more than totalValueCount, since we flush on a mini block basis loadNewBlockToBuffer(); } this.nextOffset = page.length - in.available(); }
private void loadNewBlockToBuffer() { try { minDeltaInCurrentBlock = BytesUtils.readZigZagVarInt(in); } catch (IOException e) { throw new ParquetDecodingException("can not read min delta in current block", e); } readBitWidthsForMiniBlocks(); // mini block is atomic for reading, we read a mini block when there are more values left int i; for (i = 0; i < config.miniBlockNumInABlock && valuesBuffered < totalValueCount; i++) { BytePacker packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidths[i]); unpackMiniBlock(packer); } //calculate values from deltas unpacked for current block int valueUnpacked=i*config.miniBlockSizeInValues; for (int j = valuesBuffered-valueUnpacked; j < valuesBuffered; j++) { int index = j; valuesBuffer[index] += minDeltaInCurrentBlock + valuesBuffer[index - 1]; } }