private static Optional<DictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName) { try { ByteArrayInputStream inputStream = new ByteArrayInputStream(data); PageHeader pageHeader = Util.readPageHeader(inputStream); if (pageHeader.type != PageType.DICTIONARY_PAGE) { return Optional.empty(); } Slice compressedData = wrappedBuffer(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size()); DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name())); int dictionarySize = dicHeader.getNum_values(); return Optional.of(new DictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding)); } catch (IOException ignored) { return Optional.empty(); } }
public PageReader readAllPages() throws IOException { List<DataPage> pages = new ArrayList<>(); DictionaryPage dictionaryPage = null; long valueCount = 0; while (valueCount < descriptor.getColumnChunkMetaData().getValueCount()) { PageHeader pageHeader = readPageHeader(); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); switch (pageHeader.type) { case DICTIONARY_PAGE: if (dictionaryPage != null) { throw new ParquetCorruptionException("%s has more than one dictionary page in column chunk", descriptor.getColumnDescriptor()); } dictionaryPage = readDictionaryPage(pageHeader, uncompressedPageSize, compressedPageSize); break; case DATA_PAGE: valueCount += readDataPageV1(pageHeader, uncompressedPageSize, compressedPageSize, pages); break; case DATA_PAGE_V2: valueCount += readDataPageV2(pageHeader, uncompressedPageSize, compressedPageSize, pages); break; default: skip(compressedPageSize); break; } } return new PageReader(descriptor.getColumnChunkMetaData().getCodec(), pages, dictionaryPage); }
public Object getFieldValue(_Fields field) { switch (field) { case TYPE: return getType(); case UNCOMPRESSED_PAGE_SIZE: return new Integer(getUncompressed_page_size()); case COMPRESSED_PAGE_SIZE: return new Integer(getCompressed_page_size()); case CRC: return new Integer(getCrc()); case DATA_PAGE_HEADER: return getData_page_header(); case INDEX_PAGE_HEADER: return getIndex_page_header(); case DICTIONARY_PAGE_HEADER: return getDictionary_page_header(); case DATA_PAGE_HEADER_V2: return getData_page_header_v2(); } throw new IllegalStateException(); }
public Object getFieldValue(_Fields field) { switch (field) { case TYPE: return getType(); case UNCOMPRESSED_PAGE_SIZE: return new Integer(getUncompressed_page_size()); case COMPRESSED_PAGE_SIZE: return new Integer(getCompressed_page_size()); case CRC: return new Integer(getCrc()); case DATA_PAGE_HEADER: return getData_page_header(); case INDEX_PAGE_HEADER: return getIndex_page_header(); case DICTIONARY_PAGE_HEADER: return getDictionary_page_header(); case DATA_PAGE_HEADER_V2: return getData_page_header_v2(); } throw new IllegalStateException(); }
private static DictionaryPage readDictionaryPage(byte[] data, ParquetCodecFactory codecFactory, CompressionCodecName codecName) { try { ByteArrayInputStream inputStream = new ByteArrayInputStream(data); PageHeader pageHeader = Util.readPageHeader(inputStream); if (pageHeader.type != PageType.DICTIONARY_PAGE) { return null; } // todo this wrapper is not needed BytesInput compressedData = BytesInput.from(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size()); BytesDecompressor decompressor = codecFactory.getDecompressor(codecName); BytesInput decompressed = decompressor.decompress(compressedData, pageHeader.getUncompressed_page_size()); DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); Encoding encoding = Encoding.valueOf(dicHeader.getEncoding().name()); int dictionarySize = dicHeader.getNum_values(); return new DictionaryPage(decompressed, dictionarySize, encoding); } catch (IOException ignored) { return null; } }
PageHeader pageHeader = readPageHeader(); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); switch (pageHeader.type) { case DICTIONARY_PAGE:
PageHeader pageHeader = readPageHeader(); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); switch (pageHeader.type) { case DICTIONARY_PAGE: