private static Optional<DictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName) { try { ByteArrayInputStream inputStream = new ByteArrayInputStream(data); PageHeader pageHeader = Util.readPageHeader(inputStream); if (pageHeader.type != PageType.DICTIONARY_PAGE) { return Optional.empty(); } Slice compressedData = wrappedBuffer(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size()); DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name())); int dictionarySize = dicHeader.getNum_values(); return Optional.of(new DictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding)); } catch (IOException ignored) { return Optional.empty(); } }
private static Optional<DictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName) { try { ByteArrayInputStream inputStream = new ByteArrayInputStream(data); PageHeader pageHeader = Util.readPageHeader(inputStream); if (pageHeader.type != PageType.DICTIONARY_PAGE) { return Optional.empty(); } Slice compressedData = wrappedBuffer(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size()); DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name())); int dictionarySize = dicHeader.getNum_values(); return Optional.of(new DictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding)); } catch (IOException ignored) { return Optional.empty(); } }
private void readDictionaryPage(final PageHeader pageHeader, final ColumnReader<?> parentStatus) throws IOException { int compressedSize = pageHeader.getCompressed_page_size(); int uncompressedSize = pageHeader.getUncompressed_page_size(); final DrillBuf dictionaryData = readPage(pageHeader, compressedSize, uncompressedSize); allocatedDictionaryBuffers.add(dictionaryData); DictionaryPage page = new DictionaryPage( asBytesInput(dictionaryData, 0, uncompressedSize), pageHeader.uncompressed_page_size, pageHeader.dictionary_page_header.num_values, valueOf(pageHeader.dictionary_page_header.encoding.name())); this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page); }
public Object getFieldValue(_Fields field) { switch (field) { case TYPE: return getType(); case UNCOMPRESSED_PAGE_SIZE: return getUncompressed_page_size(); case COMPRESSED_PAGE_SIZE: return getCompressed_page_size(); case CRC: return getCrc(); case DATA_PAGE_HEADER: return getData_page_header(); case INDEX_PAGE_HEADER: return getIndex_page_header(); case DICTIONARY_PAGE_HEADER: return getDictionary_page_header(); case DATA_PAGE_HEADER_V2: return getData_page_header_v2(); } throw new IllegalStateException(); }
private DictionaryPage readCompressedDictionary( PageHeader pageHeader, SeekableInputStream fin) throws IOException { DictionaryPageHeader dictHeader = pageHeader.getDictionary_page_header(); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); byte [] dictPageBytes = new byte[compressedPageSize]; fin.readFully(dictPageBytes); BytesInput bin = BytesInput.from(dictPageBytes); return new DictionaryPage( bin, uncompressedPageSize, dictHeader.getNum_values(), converter.getEncoding(dictHeader.getEncoding())); }
public static Dictionary readDictionary(FSDataInputStream in, ColumnDescriptor column, PageHeaderWithOffset pageHeader, BytesDecompressor decompressor) throws IOException { in.seek(pageHeader.getOffset()); final byte[] data = new byte[pageHeader.getPageHeader().getCompressed_page_size()]; int read = in.read(data); if (read != data.length) { throw new IOException(format("Failed to read dictionary page, read %d bytes, expected %d", read, data.length)); } final DictionaryPage dictionaryPage = new DictionaryPage( decompressor.decompress(BytesInput.from(data), pageHeader.getPageHeader().getUncompressed_page_size()), pageHeader.getPageHeader().getDictionary_page_header().getNum_values(), CONVERTER.getEncoding(pageHeader.getPageHeader().getDictionary_page_header().getEncoding())); return dictionaryPage.getEncoding().initDictionary(column, dictionaryPage); }
public java.lang.Object getFieldValue(_Fields field) { switch (field) { case TYPE: return getType(); case UNCOMPRESSED_PAGE_SIZE: return getUncompressed_page_size(); case COMPRESSED_PAGE_SIZE: return getCompressed_page_size(); case CRC: return getCrc(); case DATA_PAGE_HEADER: return getData_page_header(); case INDEX_PAGE_HEADER: return getIndex_page_header(); case DICTIONARY_PAGE_HEADER: return getDictionary_page_header(); case DATA_PAGE_HEADER_V2: return getData_page_header_v2(); case BLOOM_FILTER_PAGE_HEADER: return getBloom_filter_page_header(); } throw new java.lang.IllegalStateException(); }
private DictionaryPage readCompressedDictionary( PageHeader pageHeader, SeekableInputStream fin) throws IOException { DictionaryPageHeader dictHeader = pageHeader.getDictionary_page_header(); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); byte [] dictPageBytes = new byte[compressedPageSize]; fin.readFully(dictPageBytes); BytesInput bin = BytesInput.from(dictPageBytes); return new DictionaryPage( bin, uncompressedPageSize, dictHeader.getNum_values(), converter.getEncoding(dictHeader.getEncoding())); }
private void readDictionaryPage(final PageHeader pageHeader, final ColumnReader<?> parentStatus) throws IOException { int compressedSize = pageHeader.getCompressed_page_size(); int uncompressedSize = pageHeader.getUncompressed_page_size(); final ArrowBuf dictionaryData = allocateDictionaryBuffer(uncompressedSize); readPage(pageHeader, compressedSize, uncompressedSize, dictionaryData); DictionaryPage page = new DictionaryPage( asBytesInput(dictionaryData, 0, uncompressedSize), pageHeader.uncompressed_page_size, pageHeader.dictionary_page_header.num_values, valueOf(pageHeader.dictionary_page_header.encoding.name())); this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page); }
try { PageHeader pageHeader = Util.readPageHeader(parent.dataReader); int compressedSize = pageHeader.getCompressed_page_size(); if ( parent.parentColumnReader.isShuttingDown ) { return null; } //Opportunity to skip expensive Parquet processing pageData = parent.dataReader.getNext(compressedSize);
public PageReader readAllPages() throws IOException { List<DataPage> pages = new ArrayList<>(); DictionaryPage dictionaryPage = null; long valueCount = 0; while (valueCount < descriptor.getColumnChunkMetaData().getValueCount()) { PageHeader pageHeader = readPageHeader(); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); switch (pageHeader.type) { case DICTIONARY_PAGE: if (dictionaryPage != null) { throw new ParquetCorruptionException("%s has more than one dictionary page in column chunk", descriptor.getColumnDescriptor()); } dictionaryPage = readDictionaryPage(pageHeader, uncompressedPageSize, compressedPageSize); break; case DATA_PAGE: valueCount += readDataPageV1(pageHeader, uncompressedPageSize, compressedPageSize, pages); break; case DATA_PAGE_V2: valueCount += readDataPageV2(pageHeader, uncompressedPageSize, compressedPageSize, pages); break; default: skip(compressedPageSize); break; } } return new PageReader(descriptor.getColumnChunkMetaData().getCodec(), pages, dictionaryPage); }
public PageReader readAllPages() throws IOException { List<DataPage> pages = new ArrayList<>(); DictionaryPage dictionaryPage = null; long valueCount = 0; while (valueCount < descriptor.getColumnChunkMetaData().getValueCount()) { PageHeader pageHeader = readPageHeader(); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); switch (pageHeader.type) { case DICTIONARY_PAGE: if (dictionaryPage != null) { throw new ParquetCorruptionException("%s has more than one dictionary page in column chunk", descriptor.getColumnDescriptor()); } dictionaryPage = readDictionaryPage(pageHeader, uncompressedPageSize, compressedPageSize); break; case DATA_PAGE: valueCount += readDataPageV1(pageHeader, uncompressedPageSize, compressedPageSize, pages); break; case DATA_PAGE_V2: valueCount += readDataPageV2(pageHeader, uncompressedPageSize, compressedPageSize, pages); break; default: skip(compressedPageSize); break; } } return new PageReader(descriptor.getColumnChunkMetaData().getCodec(), pages, dictionaryPage); }
/** * Get the page header and the pageData (uncompressed) for the next page */ protected void nextInternal() throws IOException{ Stopwatch timer = Stopwatch.createUnstarted(); // next, we need to decompress the bytes // TODO - figure out if we need multiple dictionary pages, I believe it may be limited to one // I think we are clobbering parts of the dictionary if there can be multiple pages of dictionary do { long start=dataReader.getPos(); timer.start(); pageHeader = Util.readPageHeader(dataReader); long timeToRead = timer.elapsed(TimeUnit.NANOSECONDS); long pageHeaderBytes=dataReader.getPos()-start; this.updateStats(pageHeader, "Page Header", start, timeToRead, pageHeaderBytes, pageHeaderBytes); logger.trace("ParquetTrace,{},{},{},{},{},{},{},{}","Page Header Read","", this.parentColumnReader.parentReader.hadoopPath, this.parentColumnReader.columnDescriptor.toString(), start, 0, 0, timeToRead); timer.reset(); if (pageHeader.getType() == PageType.DICTIONARY_PAGE) { readDictionaryPage(pageHeader, parentColumnReader); } } while (pageHeader.getType() == PageType.DICTIONARY_PAGE); int compressedSize = pageHeader.getCompressed_page_size(); int uncompressedSize = pageHeader.getUncompressed_page_size(); pageData = readPage(pageHeader, compressedSize, uncompressedSize); }
pageHeader = Util.readPageHeader(in); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); switch (pageHeader.type) { case DICTIONARY_PAGE:
private DrillBuf decompress(PageHeader pageHeader, DrillBuf compressedData) { DrillBuf pageDataBuf = null; Stopwatch timer = Stopwatch.createUnstarted(); long timeToRead; int compressedSize = pageHeader.getCompressed_page_size(); int uncompressedSize = pageHeader.getUncompressed_page_size(); pageDataBuf = allocateTemporaryBuffer(uncompressedSize); try { timer.start(); CompressionCodecName codecName = parentColumnReader.columnChunkMetaData.getCodec(); ByteBuffer input = compressedData.nioBuffer(0, compressedSize); ByteBuffer output = pageDataBuf.nioBuffer(0, uncompressedSize); DecompressionHelper decompressionHelper = new DecompressionHelper(codecName); decompressionHelper.decompress(input, compressedSize, output, uncompressedSize); pageDataBuf.writerIndex(uncompressedSize); timeToRead = timer.elapsed(TimeUnit.NANOSECONDS); this.updateStats(pageHeader, "Decompress", 0, timeToRead, compressedSize, uncompressedSize); } catch (IOException e) { handleAndThrowException(e, "Error decompressing data."); } return pageDataBuf; }
PageHeader pageHeader = readPageHeader(); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); switch (pageHeader.type) { case DICTIONARY_PAGE:
PageHeader pageHeader = readPageHeader(); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); switch (pageHeader.type) { case DICTIONARY_PAGE:
pageHeader = Util.readPageHeader(in); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); switch (pageHeader.type) { case DICTIONARY_PAGE:
int compressedSize = pageHeader.getCompressed_page_size(); int uncompressedSize = pageHeader.getUncompressed_page_size(); readPage(pageHeader, compressedSize, uncompressedSize, pageData);