private void readPageV2(DataPageV2 page) { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = newRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevelColumn = newRLEIterator(descriptor.getMaxDefinitionLevel(), page.getDefinitionLevels()); try { LOG.debug("page data size " + page.getData().size() + " bytes and " + pageValueCount + " records"); initDataReader(page.getDataEncoding(), page.getData().toInputStream(), page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e); } }
public boolean isRepeated() { return column.getMaxRepetitionLevel() > 0; }
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedRleValuesReader(bitWidth); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromBuffer( this.pageValueCount, page.getDefinitionLevels().toByteArray()); try { initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); // do not read the length from the stream. v2 pages handle dividing the page bytes. this.defColumn = new VectorizedRleValuesReader(bitWidth, false); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromPage( this.pageValueCount, page.getDefinitionLevels().toInputStream()); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream()); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); // do not read the length from the stream. v2 pages handle dividing the page bytes. this.defColumn = new VectorizedRleValuesReader(bitWidth, false); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromPage( this.pageValueCount, page.getDefinitionLevels().toInputStream()); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream()); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
private TypeProtos.DataMode getDataMode(ColumnDescriptor column) { if (column.getMaxRepetitionLevel() > 0 ) { return DataMode.REPEATED; } else if (column.getMaxDefinitionLevel() == 0) { return TypeProtos.DataMode.REQUIRED; } else { return TypeProtos.DataMode.OPTIONAL; } }
private static TypeProtos.DataMode getDataMode(ColumnDescriptor column) { if (column.getMaxRepetitionLevel() > 0 ) { return TypeProtos.DataMode.REPEATED; } else if (column.getMaxDefinitionLevel() == 0) { return TypeProtos.DataMode.REQUIRED; } else { return TypeProtos.DataMode.OPTIONAL; } }
public ValuesWriter newRepetitionLevelWriter(ColumnDescriptor path) { return newColumnDescriptorValuesWriter(path.getMaxRepetitionLevel()); }
public ValuesWriter newRepetitionLevelWriter(ColumnDescriptor path) { return newColumnDescriptorValuesWriter(path.getMaxRepetitionLevel()); }
public RunLengthBitPackingHybridEncoder newRepetitionLevelEncoder(ColumnDescriptor path) { return newLevelEncoder(path.getMaxRepetitionLevel()); }
public RunLengthBitPackingHybridEncoder newRepetitionLevelEncoder(ColumnDescriptor path) { return newLevelEncoder(path.getMaxRepetitionLevel()); }
@Override ValuesWriter createRLWriter(ParquetProperties props, ColumnDescriptor path) { return path.getMaxRepetitionLevel() == 0 ? NULL_WRITER : new RLEWriterForV2(props.newRepetitionLevelEncoder(path)); }
static int getMaxLevel(ColumnDescriptor descriptor, ValuesType valuesType) { switch (valuesType) { case REPETITION_LEVEL: return descriptor.getMaxRepetitionLevel(); case DEFINITION_LEVEL: return descriptor.getMaxDefinitionLevel(); case VALUES: if (descriptor.getType() == BOOLEAN) { return 1; } default: throw new ParquetDecodingException("Unsupported values type: " + valuesType); } }
static int getMaxLevel(ColumnDescriptor descriptor, ValuesType valuesType) { switch (valuesType) { case REPETITION_LEVEL: return descriptor.getMaxRepetitionLevel(); case DEFINITION_LEVEL: return descriptor.getMaxDefinitionLevel(); case VALUES: if (descriptor.getType() == BOOLEAN) { return 1; } default: throw new ParquetDecodingException("Unsupported values type: " + valuesType); } }
public RichColumnDescriptor( ColumnDescriptor descriptor, PrimitiveType primitiveType) { super(descriptor.getPath(), primitiveType.getPrimitiveTypeName(), primitiveType.getTypeLength(), descriptor.getMaxRepetitionLevel(), descriptor.getMaxDefinitionLevel()); this.primitiveType = primitiveType; this.required = primitiveType.getRepetition() != OPTIONAL; }
public RichColumnDescriptor( ColumnDescriptor descriptor, PrimitiveType primitiveType) { super(descriptor.getPath(), primitiveType.getPrimitiveTypeName(), primitiveType.getTypeLength(), descriptor.getMaxRepetitionLevel(), descriptor.getMaxDefinitionLevel()); this.primitiveType = primitiveType; this.required = primitiveType.getRepetition() != OPTIONAL; }
private void initFromPage(DataPageV2 page) { this.triplesCount = page.getValueCount(); this.repetitionLevels = newRLEIterator(desc.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevels = newRLEIterator(desc.getMaxDefinitionLevel(), page.getDefinitionLevels()); LOG.debug("page data size {} bytes and {} records", page.getData().size(), triplesCount); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream(), triplesCount); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + desc, e); } }
private void readPageV2(DataPageV2 page) { this.repetitionLevelColumn = newRLEIterator(path.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevelColumn = newRLEIterator(path.getMaxDefinitionLevel(), page.getDefinitionLevels()); LOG.debug("page data size {} bytes and {} records", page.getData().size(), pageValueCount); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream(), page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + path, e); } }
private void readPageV2(DataPageV2 page) { this.repetitionLevelColumn = newRLEIterator(path.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevelColumn = newRLEIterator(path.getMaxDefinitionLevel(), page.getDefinitionLevels()); int valueCount = page.getValueCount(); LOG.debug("page data size {} bytes and {} values", page.getData().size(), valueCount); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream(), valueCount); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + path, e); } newPageInitialized(page); }
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedRleValuesReader(bitWidth); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromBuffer( this.pageValueCount, page.getDefinitionLevels().toByteArray()); try { initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }