private static Set<org.apache.parquet.column.Encoding> readEncodings(List<Encoding> encodings) { Set<org.apache.parquet.column.Encoding> columnEncodings = new HashSet<>(); for (Encoding encoding : encodings) { columnEncodings.add(org.apache.parquet.column.Encoding.valueOf(encoding.name())); } return Collections.unmodifiableSet(columnEncodings); }
private static Set<org.apache.parquet.column.Encoding> readEncodings(List<Encoding> encodings) { Set<org.apache.parquet.column.Encoding> columnEncodings = new HashSet<>(); for (Encoding encoding : encodings) { columnEncodings.add(org.apache.parquet.column.Encoding.valueOf(encoding.name())); } return Collections.unmodifiableSet(columnEncodings); }
public org.apache.parquet.column.Encoding getEncoding(Encoding encoding) { return org.apache.parquet.column.Encoding.valueOf(encoding.name()); }
public org.apache.parquet.column.Encoding getEncoding(Encoding encoding) { return org.apache.parquet.column.Encoding.valueOf(encoding.name()); }
private static Optional<DictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName) { try { ByteArrayInputStream inputStream = new ByteArrayInputStream(data); PageHeader pageHeader = Util.readPageHeader(inputStream); if (pageHeader.type != PageType.DICTIONARY_PAGE) { return Optional.empty(); } Slice compressedData = wrappedBuffer(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size()); DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name())); int dictionarySize = dicHeader.getNum_values(); return Optional.of(new DictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding)); } catch (IOException ignored) { return Optional.empty(); } }
private static Optional<DictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName) { try { ByteArrayInputStream inputStream = new ByteArrayInputStream(data); PageHeader pageHeader = Util.readPageHeader(inputStream); if (pageHeader.type != PageType.DICTIONARY_PAGE) { return Optional.empty(); } Slice compressedData = wrappedBuffer(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size()); DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name())); int dictionarySize = dicHeader.getNum_values(); return Optional.of(new DictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding)); } catch (IOException ignored) { return Optional.empty(); } }
private void readDictionaryPage(final PageHeader pageHeader, final ColumnReader<?> parentStatus) throws IOException { int compressedSize = pageHeader.getCompressed_page_size(); int uncompressedSize = pageHeader.getUncompressed_page_size(); final DrillBuf dictionaryData = readPage(pageHeader, compressedSize, uncompressedSize); allocatedDictionaryBuffers.add(dictionaryData); DictionaryPage page = new DictionaryPage( asBytesInput(dictionaryData, 0, uncompressedSize), pageHeader.uncompressed_page_size, pageHeader.dictionary_page_header.num_values, valueOf(pageHeader.dictionary_page_header.encoding.name())); this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page); }
private long readDataPageV1(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize, List<DataPage> pages) { DataPageHeader dataHeaderV1 = pageHeader.getData_page_header(); pages.add(new DataPageV1( getSlice(compressedPageSize), dataHeaderV1.getNum_values(), uncompressedPageSize, MetadataReader.readStats( dataHeaderV1.getStatistics(), descriptor.getColumnDescriptor().getType()), getParquetEncoding(Encoding.valueOf(dataHeaderV1.getRepetition_level_encoding().name())), getParquetEncoding(Encoding.valueOf(dataHeaderV1.getDefinition_level_encoding().name())), getParquetEncoding(Encoding.valueOf(dataHeaderV1.getEncoding().name())))); return dataHeaderV1.getNum_values(); }
private long readDataPageV1(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize, List<DataPage> pages) { DataPageHeader dataHeaderV1 = pageHeader.getData_page_header(); pages.add(new DataPageV1( getSlice(compressedPageSize), dataHeaderV1.getNum_values(), uncompressedPageSize, MetadataReader.readStats( dataHeaderV1.getStatistics(), descriptor.getColumnDescriptor().getType()), getParquetEncoding(Encoding.valueOf(dataHeaderV1.getRepetition_level_encoding().name())), getParquetEncoding(Encoding.valueOf(dataHeaderV1.getDefinition_level_encoding().name())), getParquetEncoding(Encoding.valueOf(dataHeaderV1.getEncoding().name())))); return dataHeaderV1.getNum_values(); }
private DictionaryPage readDictionaryPage(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize) { DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); return new DictionaryPage( getSlice(compressedPageSize), uncompressedPageSize, dicHeader.getNum_values(), getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name()))); }
private DictionaryPage readDictionaryPage(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize) { DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); return new DictionaryPage( getSlice(compressedPageSize), uncompressedPageSize, dicHeader.getNum_values(), getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name()))); }
private void readDictionaryPageData(final ReadStatus readStatus, final ColumnReader<?> parentStatus) throws UserException { try { pageHeader = readStatus.getPageHeader(); int uncompressedSize = pageHeader.getUncompressed_page_size(); final DrillBuf dictionaryData = getDecompressedPageData(readStatus); Stopwatch timer = Stopwatch.createStarted(); allocatedDictionaryBuffers.add(dictionaryData); DictionaryPage page = new DictionaryPage(asBytesInput(dictionaryData, 0, uncompressedSize), pageHeader.uncompressed_page_size, pageHeader.dictionary_page_header.num_values, valueOf(pageHeader.dictionary_page_header.encoding.name())); this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page); long timeToDecode = timer.elapsed(TimeUnit.NANOSECONDS); stats.timeDictPageDecode.addAndGet(timeToDecode); } catch (Exception e) { handleAndThrowException(e, "Error decoding dictionary page."); } }
private void readDictionaryPage(final PageHeader pageHeader, final ColumnReader<?> parentStatus) throws IOException { int compressedSize = pageHeader.getCompressed_page_size(); int uncompressedSize = pageHeader.getUncompressed_page_size(); final ArrowBuf dictionaryData = allocateDictionaryBuffer(uncompressedSize); readPage(pageHeader, compressedSize, uncompressedSize, dictionaryData); DictionaryPage page = new DictionaryPage( asBytesInput(dictionaryData, 0, uncompressedSize), pageHeader.uncompressed_page_size, pageHeader.dictionary_page_header.num_values, valueOf(pageHeader.dictionary_page_header.encoding.name())); this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page); }
private long readDataPageV2(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize, List<DataPage> pages) { DataPageHeaderV2 dataHeaderV2 = pageHeader.getData_page_header_v2(); int dataSize = compressedPageSize - dataHeaderV2.getRepetition_levels_byte_length() - dataHeaderV2.getDefinition_levels_byte_length(); pages.add(new DataPageV2( dataHeaderV2.getNum_rows(), dataHeaderV2.getNum_nulls(), dataHeaderV2.getNum_values(), getSlice(dataHeaderV2.getRepetition_levels_byte_length()), getSlice(dataHeaderV2.getDefinition_levels_byte_length()), getParquetEncoding(Encoding.valueOf(dataHeaderV2.getEncoding().name())), getSlice(dataSize), uncompressedPageSize, MetadataReader.readStats( dataHeaderV2.getStatistics(), descriptor.getColumnDescriptor().getType()), dataHeaderV2.isIs_compressed())); return dataHeaderV2.getNum_values(); } }
private long readDataPageV2(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize, List<DataPage> pages) { DataPageHeaderV2 dataHeaderV2 = pageHeader.getData_page_header_v2(); int dataSize = compressedPageSize - dataHeaderV2.getRepetition_levels_byte_length() - dataHeaderV2.getDefinition_levels_byte_length(); pages.add(new DataPageV2( dataHeaderV2.getNum_rows(), dataHeaderV2.getNum_nulls(), dataHeaderV2.getNum_values(), getSlice(dataHeaderV2.getRepetition_levels_byte_length()), getSlice(dataHeaderV2.getDefinition_levels_byte_length()), getParquetEncoding(Encoding.valueOf(dataHeaderV2.getEncoding().name())), getSlice(dataSize), uncompressedPageSize, MetadataReader.readStats( dataHeaderV2.getStatistics(), descriptor.getColumnDescriptor().getType()), dataHeaderV2.isIs_compressed())); return dataHeaderV2.getNum_values(); } }