private static Set<parquet.column.Encoding> readEncodings(List<Encoding> encodings) { Set<parquet.column.Encoding> columnEncodings = new HashSet<>(); for (Encoding encoding : encodings) { columnEncodings.add(parquet.column.Encoding.valueOf(encoding.name())); } return Collections.unmodifiableSet(columnEncodings); }
private static Optional<DictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName) { try { ByteArrayInputStream inputStream = new ByteArrayInputStream(data); PageHeader pageHeader = Util.readPageHeader(inputStream); if (pageHeader.type != PageType.DICTIONARY_PAGE) { return Optional.empty(); } Slice compressedData = wrappedBuffer(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size()); DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name())); int dictionarySize = dicHeader.getNum_values(); return Optional.of(new DictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding)); } catch (IOException ignored) { return Optional.empty(); } }
private long readDataPageV1(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize, List<DataPage> pages) { DataPageHeader dataHeaderV1 = pageHeader.getData_page_header(); pages.add(new DataPageV1( getSlice(compressedPageSize), dataHeaderV1.getNum_values(), uncompressedPageSize, MetadataReader.readStats( dataHeaderV1.getStatistics(), descriptor.getColumnDescriptor().getType()), getParquetEncoding(Encoding.valueOf(dataHeaderV1.getRepetition_level_encoding().name())), getParquetEncoding(Encoding.valueOf(dataHeaderV1.getDefinition_level_encoding().name())), getParquetEncoding(Encoding.valueOf(dataHeaderV1.getEncoding().name())))); return dataHeaderV1.getNum_values(); }
private DictionaryPage readDictionaryPage(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize) { DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); return new DictionaryPage( getSlice(compressedPageSize), uncompressedPageSize, dicHeader.getNum_values(), getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name()))); }
private long readDataPageV2(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize, List<DataPage> pages) { DataPageHeaderV2 dataHeaderV2 = pageHeader.getData_page_header_v2(); int dataSize = compressedPageSize - dataHeaderV2.getRepetition_levels_byte_length() - dataHeaderV2.getDefinition_levels_byte_length(); pages.add(new DataPageV2( dataHeaderV2.getNum_rows(), dataHeaderV2.getNum_nulls(), dataHeaderV2.getNum_values(), getSlice(dataHeaderV2.getRepetition_levels_byte_length()), getSlice(dataHeaderV2.getDefinition_levels_byte_length()), getParquetEncoding(Encoding.valueOf(dataHeaderV2.getEncoding().name())), getSlice(dataSize), uncompressedPageSize, MetadataReader.readStats( dataHeaderV2.getStatistics(), descriptor.getColumnDescriptor().getType()), dataHeaderV2.isIs_compressed())); return dataHeaderV2.getNum_values(); } }
private static Set<parquet.column.Encoding> readEncodings(List<Encoding> encodings) { Set<parquet.column.Encoding> columnEncodings = new HashSet<>(); for (Encoding encoding : encodings) { columnEncodings.add(parquet.column.Encoding.valueOf(encoding.name())); } return Collections.unmodifiableSet(columnEncodings); }
public parquet.column.Encoding getEncoding(Encoding encoding) { return parquet.column.Encoding.valueOf(encoding.name()); }
private long readDataPageV1(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize, List<DataPage> pages) throws IOException { DataPageHeader dataHeaderV1 = pageHeader.getData_page_header(); pages.add(new DataPageV1( getBytesInput(compressedPageSize), dataHeaderV1.getNum_values(), uncompressedPageSize, ParquetMetadataReader.readStats( dataHeaderV1.getStatistics(), descriptor.getColumnDescriptor().getType()), Encoding.valueOf(dataHeaderV1.getRepetition_level_encoding().name()), Encoding.valueOf(dataHeaderV1.getDefinition_level_encoding().name()), Encoding.valueOf(dataHeaderV1.getEncoding().name()))); return dataHeaderV1.getNum_values(); }
private DictionaryPage readDictionaryPage(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize) throws IOException { DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); return new DictionaryPage( getBytesInput(compressedPageSize), uncompressedPageSize, dicHeader.getNum_values(), Encoding.valueOf(dicHeader.getEncoding().name())); }
private static DictionaryPage readDictionaryPage(byte[] data, ParquetCodecFactory codecFactory, CompressionCodecName codecName) { try { ByteArrayInputStream inputStream = new ByteArrayInputStream(data); PageHeader pageHeader = Util.readPageHeader(inputStream); if (pageHeader.type != PageType.DICTIONARY_PAGE) { return null; } // todo this wrapper is not needed BytesInput compressedData = BytesInput.from(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size()); BytesDecompressor decompressor = codecFactory.getDecompressor(codecName); BytesInput decompressed = decompressor.decompress(compressedData, pageHeader.getUncompressed_page_size()); DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); Encoding encoding = Encoding.valueOf(dicHeader.getEncoding().name()); int dictionarySize = dicHeader.getNum_values(); return new DictionaryPage(decompressed, dictionarySize, encoding); } catch (IOException ignored) { return null; } }
private long readDataPageV2(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize, List<DataPage> pages) throws IOException { DataPageHeaderV2 dataHeaderV2 = pageHeader.getData_page_header_v2(); int dataSize = compressedPageSize - dataHeaderV2.getRepetition_levels_byte_length() - dataHeaderV2.getDefinition_levels_byte_length(); pages.add(new DataPageV2( dataHeaderV2.getNum_rows(), dataHeaderV2.getNum_nulls(), dataHeaderV2.getNum_values(), getBytesInput(dataHeaderV2.getRepetition_levels_byte_length()), getBytesInput(dataHeaderV2.getDefinition_levels_byte_length()), Encoding.valueOf(dataHeaderV2.getEncoding().name()), getBytesInput(dataSize), uncompressedPageSize, ParquetMetadataReader.readStats( dataHeaderV2.getStatistics(), descriptor.getColumnDescriptor().getType()), dataHeaderV2.isIs_compressed())); return dataHeaderV2.getNum_values(); } }