@Override public DictionaryPage readDictionaryPage() { if (dictionaryPage == null) { PageHeader pageHeader = new PageHeader(); long pos = 0; try { pos = in.getPos(); pageHeader = Util.readPageHeader(in); if (pageHeader.getDictionary_page_header() == null) { in.seek(pos); return null; } dictionaryPage = readDictionaryPageHelper(pageHeader); } catch (Exception e) { throw new RuntimeException("Error reading dictionary page." + "\nFile path: " + path.toUri().getPath() + "\nRow count: " + rowCount + "\nColumn Chunk Metadata: " + metaData + "\nPage Header: " + pageHeader + "\nFile offset: " + fileOffset + "\nSize: " + size + "\nValue read so far: " + valueReadSoFar + "\nPosition: " + pos, e); } } return dictionaryPage; }
private DictionaryPage readDictionaryPageHelper(PageHeader pageHeader) throws IOException { ByteBuffer data = uncompressPage(pageHeader, false); return new DictionaryPage( BytesInput.from(data, 0, pageHeader.uncompressed_page_size), pageHeader.getDictionary_page_header().getNum_values(), parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding) ); }
private static Optional<DictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName) { try { ByteArrayInputStream inputStream = new ByteArrayInputStream(data); PageHeader pageHeader = Util.readPageHeader(inputStream); if (pageHeader.type != PageType.DICTIONARY_PAGE) { return Optional.empty(); } Slice compressedData = wrappedBuffer(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size()); DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name())); int dictionarySize = dicHeader.getNum_values(); return Optional.of(new DictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding)); } catch (IOException ignored) { return Optional.empty(); } }
private static Optional<DictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName) { try { ByteArrayInputStream inputStream = new ByteArrayInputStream(data); PageHeader pageHeader = Util.readPageHeader(inputStream); if (pageHeader.type != PageType.DICTIONARY_PAGE) { return Optional.empty(); } Slice compressedData = wrappedBuffer(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size()); DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name())); int dictionarySize = dicHeader.getNum_values(); return Optional.of(new DictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding)); } catch (IOException ignored) { return Optional.empty(); } }
public static Dictionary readDictionary(FSDataInputStream in, ColumnDescriptor column, PageHeaderWithOffset pageHeader, BytesDecompressor decompressor) throws IOException { in.seek(pageHeader.getOffset()); final byte[] data = new byte[pageHeader.getPageHeader().getCompressed_page_size()]; int read = in.read(data); if (read != data.length) { throw new IOException(format("Failed to read dictionary page, read %d bytes, expected %d", read, data.length)); } final DictionaryPage dictionaryPage = new DictionaryPage( decompressor.decompress(BytesInput.from(data), pageHeader.getPageHeader().getUncompressed_page_size()), pageHeader.getPageHeader().getDictionary_page_header().getNum_values(), CONVERTER.getEncoding(pageHeader.getPageHeader().getDictionary_page_header().getEncoding())); return dictionaryPage.getEncoding().initDictionary(column, dictionaryPage); }
pos = in.getPos(); pageHeader = Util.readPageHeader(in); if (pageHeader.getDictionary_page_header() == null) { in.seek(pos); return null; new DictionaryPage( decompressor.decompress(BytesInput.from(in, pageHeader.compressed_page_size), pageHeader.getUncompressed_page_size()), pageHeader.getDictionary_page_header().getNum_values(), parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding) );
private DictionaryPage readDictionaryPage(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize) { DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); return new DictionaryPage( getSlice(compressedPageSize), uncompressedPageSize, dicHeader.getNum_values(), getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name()))); }
private DictionaryPage readDictionaryPage(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize) { DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); return new DictionaryPage( getSlice(compressedPageSize), uncompressedPageSize, dicHeader.getNum_values(), getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name()))); }
public Object getFieldValue(_Fields field) { switch (field) { case TYPE: return getType(); case UNCOMPRESSED_PAGE_SIZE: return getUncompressed_page_size(); case COMPRESSED_PAGE_SIZE: return getCompressed_page_size(); case CRC: return getCrc(); case DATA_PAGE_HEADER: return getData_page_header(); case INDEX_PAGE_HEADER: return getIndex_page_header(); case DICTIONARY_PAGE_HEADER: return getDictionary_page_header(); case DATA_PAGE_HEADER_V2: return getData_page_header_v2(); } throw new IllegalStateException(); }
private DictionaryPage readCompressedDictionary( PageHeader pageHeader, SeekableInputStream fin) throws IOException { DictionaryPageHeader dictHeader = pageHeader.getDictionary_page_header(); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); byte [] dictPageBytes = new byte[compressedPageSize]; fin.readFully(dictPageBytes); BytesInput bin = BytesInput.from(dictPageBytes); return new DictionaryPage( bin, uncompressedPageSize, dictHeader.getNum_values(), converter.getEncoding(dictHeader.getEncoding())); }
public java.lang.Object getFieldValue(_Fields field) { switch (field) { case TYPE: return getType(); case UNCOMPRESSED_PAGE_SIZE: return getUncompressed_page_size(); case COMPRESSED_PAGE_SIZE: return getCompressed_page_size(); case CRC: return getCrc(); case DATA_PAGE_HEADER: return getData_page_header(); case INDEX_PAGE_HEADER: return getIndex_page_header(); case DICTIONARY_PAGE_HEADER: return getDictionary_page_header(); case DATA_PAGE_HEADER_V2: return getData_page_header_v2(); case BLOOM_FILTER_PAGE_HEADER: return getBloom_filter_page_header(); } throw new java.lang.IllegalStateException(); }
private DictionaryPage readCompressedDictionary( PageHeader pageHeader, SeekableInputStream fin) throws IOException { DictionaryPageHeader dictHeader = pageHeader.getDictionary_page_header(); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); byte [] dictPageBytes = new byte[compressedPageSize]; fin.readFully(dictPageBytes); BytesInput bin = BytesInput.from(dictPageBytes); return new DictionaryPage( bin, uncompressedPageSize, dictHeader.getNum_values(), converter.getEncoding(dictHeader.getEncoding())); }
if (pageHeader.getType() == PageType.DICTIONARY_PAGE) { readStatus.setIsDictionaryPage(true); valuesRead += pageHeader.getDictionary_page_header().getNum_values(); } else { valuesRead += pageHeader.getData_page_header().getNum_values();
throw new ParquetDecodingException("more than one dictionary page in column " + descriptor.col); DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); dictionaryPage = new DictionaryPage(
throw new ParquetDecodingException("more than one dictionary page in column " + descriptor.col); DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); dictionaryPage = new DictionaryPage(