private void readPageV2(DataPageV2 page) { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = newRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevelColumn = newRLEIterator(descriptor.getMaxDefinitionLevel(), page.getDefinitionLevels()); try { LOG.debug("page data size " + page.getData().size() + " bytes and " + pageValueCount + " records"); initDataReader(page.getDataEncoding(), page.getData().toInputStream(), page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e); } }
@Override public String toString() { return "Page V2 [" + "dl size=" + definitionLevels.size() + ", " + "rl size=" + repetitionLevels.size() + ", " + "data size=" + data.size() + ", " + "data enc=" + dataEncoding + ", " + "valueCount=" + getValueCount() + ", " + "rowCount=" + getRowCount() + ", " + "is compressed=" + isCompressed + ", " + "uncompressedSize=" + getUncompressedSize() + "]"; } }
/** * @param rowCount count of rows * @param nullCount count of nulls * @param valueCount count of values * @param repetitionLevels RLE encoded repetition levels * @param definitionLevels RLE encoded definition levels * @param dataEncoding encoding for the data * @param data data encoded with dataEncoding and compressed * @param uncompressedSize total size uncompressed (rl + dl + data) * @param statistics optional statistics for this page * @return a compressed page */ public static DataPageV2 compressed( int rowCount, int nullCount, int valueCount, BytesInput repetitionLevels, BytesInput definitionLevels, Encoding dataEncoding, BytesInput data, int uncompressedSize, Statistics<?> statistics) { return new DataPageV2( rowCount, nullCount, valueCount, repetitionLevels, definitionLevels, dataEncoding, data, uncompressedSize, statistics, true); }
@Override public String toString() { return "Page V2 [" + "dl size=" + definitionLevels.size() + ", " + "rl size=" + repetitionLevels.size() + ", " + "data size=" + data.size() + ", " + "data enc=" + dataEncoding + ", " + "valueCount=" + getValueCount() + ", " + "rowCount=" + getRowCount() + ", " + "is compressed=" + isCompressed + ", " + "uncompressedSize=" + getUncompressedSize() + "]"; } }
/** * @param rowCount count of rows * @param nullCount count of nulls * @param valueCount count of values * @param repetitionLevels RLE encoded repetition levels * @param definitionLevels RLE encoded definition levels * @param dataEncoding encoding for the data * @param data data encoded with dataEncoding and compressed * @param uncompressedSize total size uncompressed (rl + dl + data) * @param statistics optional statistics for this page * @return a compressed page */ public static DataPageV2 compressed( int rowCount, int nullCount, int valueCount, BytesInput repetitionLevels, BytesInput definitionLevels, Encoding dataEncoding, BytesInput data, int uncompressedSize, Statistics<?> statistics) { return new DataPageV2( rowCount, nullCount, valueCount, repetitionLevels, definitionLevels, dataEncoding, data, uncompressedSize, statistics, true); }
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedRleValuesReader(bitWidth); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromBuffer( this.pageValueCount, page.getDefinitionLevels().toByteArray()); try { initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
/** * @param rowCount count of rows * @param nullCount count of nulls * @param valueCount count of values * @param firstRowIndex the index of the first row in this page * @param repetitionLevels RLE encoded repetition levels * @param definitionLevels RLE encoded definition levels * @param dataEncoding encoding for the data * @param data data encoded with dataEncoding * @param statistics optional statistics for this page * @return an uncompressed page */ public static DataPageV2 uncompressed( int rowCount, int nullCount, int valueCount, long firstRowIndex, BytesInput repetitionLevels, BytesInput definitionLevels, Encoding dataEncoding, BytesInput data, Statistics<?> statistics) { return new DataPageV2( rowCount, nullCount, valueCount, firstRowIndex, repetitionLevels, definitionLevels, dataEncoding, data, Ints.checkedCast(repetitionLevels.size() + definitionLevels.size() + data.size()), statistics, false); }
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); // do not read the length from the stream. v2 pages handle dividing the page bytes. this.defColumn = new VectorizedRleValuesReader(bitWidth, false); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromPage( this.pageValueCount, page.getDefinitionLevels().toInputStream()); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream()); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
/** * @param rowCount count of rows * @param nullCount count of nulls * @param valueCount count of values * @param repetitionLevels RLE encoded repetition levels * @param definitionLevels RLE encoded definition levels * @param dataEncoding encoding for the data * @param data data encoded with dataEncoding * @param statistics optional statistics for this page * @return an uncompressed page */ public static DataPageV2 uncompressed( int rowCount, int nullCount, int valueCount, BytesInput repetitionLevels, BytesInput definitionLevels, Encoding dataEncoding, BytesInput data, Statistics<?> statistics) { return new DataPageV2( rowCount, nullCount, valueCount, repetitionLevels, definitionLevels, dataEncoding, data, Ints.checkedCast(repetitionLevels.size() + definitionLevels.size() + data.size()), statistics, false); }
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); // do not read the length from the stream. v2 pages handle dividing the page bytes. this.defColumn = new VectorizedRleValuesReader(bitWidth, false); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromPage( this.pageValueCount, page.getDefinitionLevels().toInputStream()); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream()); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
/** * @param rowCount count of rows * @param nullCount count of nulls * @param valueCount count of values * @param repetitionLevels RLE encoded repetition levels * @param definitionLevels RLE encoded definition levels * @param dataEncoding encoding for the data * @param data data encoded with dataEncoding * @param statistics optional statistics for this page * @return an uncompressed page */ public static DataPageV2 uncompressed( int rowCount, int nullCount, int valueCount, BytesInput repetitionLevels, BytesInput definitionLevels, Encoding dataEncoding, BytesInput data, Statistics<?> statistics) { return new DataPageV2( rowCount, nullCount, valueCount, repetitionLevels, definitionLevels, dataEncoding, data, Ints.checkedCast(repetitionLevels.size() + definitionLevels.size() + data.size()), statistics, false); }
private void initFromPage(DataPageV2 page) { this.triplesCount = page.getValueCount(); this.repetitionLevels = newRLEIterator(desc.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevels = newRLEIterator(desc.getMaxDefinitionLevel(), page.getDefinitionLevels()); LOG.debug("page data size {} bytes and {} records", page.getData().size(), triplesCount); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream(), triplesCount); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + desc, e); } }
BytesInput data = BytesInput.from(byteBuffer.slice()); return new DataPageV2( dataHeaderV2.getNum_rows(), dataHeaderV2.getNum_nulls(),
private void readPageV2(DataPageV2 page) { this.repetitionLevelColumn = newRLEIterator(path.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevelColumn = newRLEIterator(path.getMaxDefinitionLevel(), page.getDefinitionLevels()); LOG.debug("page data size {} bytes and {} records", page.getData().size(), pageValueCount); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream(), page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + path, e); } }
int dataSize = compressedPageSize - dataHeaderV2.getRepetition_levels_byte_length() - dataHeaderV2.getDefinition_levels_byte_length(); pagesInChunk.add( new DataPageV2( dataHeaderV2.getNum_rows(), dataHeaderV2.getNum_nulls(),
private void readPageV2(DataPageV2 page) { this.repetitionLevelColumn = newRLEIterator(path.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevelColumn = newRLEIterator(path.getMaxDefinitionLevel(), page.getDefinitionLevels()); int valueCount = page.getValueCount(); LOG.debug("page data size {} bytes and {} values", page.getData().size(), valueCount); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream(), valueCount); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + path, e); } newPageInitialized(page); }
DataPageHeaderV2 dataHeaderV2 = pageHeader.getData_page_header_v2(); int dataSize = compressedPageSize - dataHeaderV2.getRepetition_levels_byte_length() - dataHeaderV2.getDefinition_levels_byte_length(); return new DataPageV2( dataHeaderV2.getNum_rows(), dataHeaderV2.getNum_nulls(),
private void readPageV2(DataPageV2 page) { this.pageValueCount = page.getValueCount(); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); defColumn = new VectorizedDefValuesReader(bitWidth); try { defColumn.initFromBuffer(this.pageValueCount, page.getDefinitionLevels().toByteArray()); initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e); } }
int dataSize = compressedPageSize - dataHeaderV2.getRepetition_levels_byte_length() - dataHeaderV2.getDefinition_levels_byte_length(); pagesInChunk.add( new DataPageV2( dataHeaderV2.getNum_rows(), dataHeaderV2.getNum_nulls(),
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedRleValuesReader(bitWidth); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromBuffer( this.pageValueCount, page.getDefinitionLevels().toByteArray()); try { initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }