public java.lang.Object getFieldValue(_Fields field) { switch (field) { case TYPE: return getType(); case UNCOMPRESSED_PAGE_SIZE: return getUncompressed_page_size(); case COMPRESSED_PAGE_SIZE: return getCompressed_page_size(); case CRC: return getCrc(); case DATA_PAGE_HEADER: return getData_page_header(); case INDEX_PAGE_HEADER: return getIndex_page_header(); case DICTIONARY_PAGE_HEADER: return getDictionary_page_header(); case DATA_PAGE_HEADER_V2: return getData_page_header_v2(); case BLOOM_FILTER_PAGE_HEADER: return getBloom_filter_page_header(); } throw new java.lang.IllegalStateException(); }
@Override public DictionaryPage readDictionaryPage() { if (dictionaryPage == null) { PageHeader pageHeader = new PageHeader(); long pos = 0; try { pos = in.getPos(); pageHeader = Util.readPageHeader(in); if (pageHeader.getDictionary_page_header() == null) { in.seek(pos); return null; } dictionaryPage = readDictionaryPageHelper(pageHeader); } catch (Exception e) { throw new RuntimeException("Error reading dictionary page." + "\nFile path: " + path.toUri().getPath() + "\nRow count: " + rowCount + "\nColumn Chunk Metadata: " + metaData + "\nPage Header: " + pageHeader + "\nFile offset: " + fileOffset + "\nSize: " + size + "\nValue read so far: " + valueReadSoFar + "\nPosition: " + pos, e); } } return dictionaryPage; }
@Override public int hashCode() { int hashCode = 1; hashCode = hashCode * 8191 + ((isSetType()) ? 131071 : 524287); if (isSetType()) hashCode = hashCode * 8191 + type.getValue(); hashCode = hashCode * 8191 + uncompressed_page_size; hashCode = hashCode * 8191 + compressed_page_size; hashCode = hashCode * 8191 + ((isSetCrc()) ? 131071 : 524287); if (isSetCrc()) hashCode = hashCode * 8191 + crc; hashCode = hashCode * 8191 + ((isSetData_page_header()) ? 131071 : 524287); if (isSetData_page_header()) hashCode = hashCode * 8191 + data_page_header.hashCode(); hashCode = hashCode * 8191 + ((isSetIndex_page_header()) ? 131071 : 524287); if (isSetIndex_page_header()) hashCode = hashCode * 8191 + index_page_header.hashCode(); hashCode = hashCode * 8191 + ((isSetDictionary_page_header()) ? 131071 : 524287); if (isSetDictionary_page_header()) hashCode = hashCode * 8191 + dictionary_page_header.hashCode(); hashCode = hashCode * 8191 + ((isSetData_page_header_v2()) ? 131071 : 524287); if (isSetData_page_header_v2()) hashCode = hashCode * 8191 + data_page_header_v2.hashCode(); hashCode = hashCode * 8191 + ((isSetBloom_filter_page_header()) ? 131071 : 524287); if (isSetBloom_filter_page_header()) hashCode = hashCode * 8191 + bloom_filter_page_header.hashCode(); return hashCode; }
/** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ public boolean isSet(_Fields field) { if (field == null) { throw new IllegalArgumentException(); } switch (field) { case TYPE: return isSetType(); case UNCOMPRESSED_PAGE_SIZE: return isSetUncompressed_page_size(); case COMPRESSED_PAGE_SIZE: return isSetCompressed_page_size(); case CRC: return isSetCrc(); case DATA_PAGE_HEADER: return isSetData_page_header(); case INDEX_PAGE_HEADER: return isSetIndex_page_header(); case DICTIONARY_PAGE_HEADER: return isSetDictionary_page_header(); case DATA_PAGE_HEADER_V2: return isSetData_page_header_v2(); } throw new IllegalStateException(); }
case TYPE: if (value == null) { unsetType(); } else { setType((PageType)value); unsetUncompressed_page_size(); } else { setUncompressed_page_size((Integer)value); unsetCompressed_page_size(); } else { setCompressed_page_size((Integer)value); unsetCrc(); } else { setCrc((Integer)value); unsetData_page_header(); } else { setData_page_header((DataPageHeader)value); unsetIndex_page_header(); } else { setIndex_page_header((IndexPageHeader)value); unsetDictionary_page_header(); } else { setDictionary_page_header((DictionaryPageHeader)value);
while (valuesCountReadSoFar < descriptor.metadata.getValueCount()) { PageHeader pageHeader = readPageHeader(); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); switch (pageHeader.type) { case DICTIONARY_PAGE: DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); dictionaryPage = new DictionaryPage( break; case DATA_PAGE: DataPageHeader dataHeaderV1 = pageHeader.getData_page_header(); pagesInChunk.add( new DataPageV1( break; case DATA_PAGE_V2: DataPageHeaderV2 dataHeaderV2 = pageHeader.getData_page_header_v2(); int dataSize = compressedPageSize - dataHeaderV2.getRepetition_levels_byte_length() - dataHeaderV2.getDefinition_levels_byte_length(); pagesInChunk.add( break; default: LOG.debug("skipping page of type {} of size {}", pageHeader.getType(), compressedPageSize); stream.skipFully(compressedPageSize); break;
private DictionaryPage readCompressedDictionary( PageHeader pageHeader, SeekableInputStream fin) throws IOException { DictionaryPageHeader dictHeader = pageHeader.getDictionary_page_header(); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); byte [] dictPageBytes = new byte[compressedPageSize]; fin.readFully(dictPageBytes); BytesInput bin = BytesInput.from(dictPageBytes); return new DictionaryPage( bin, uncompressedPageSize, dictHeader.getNum_values(), converter.getEncoding(dictHeader.getEncoding())); }
@Override public DataPage readPage() { PageHeader pageHeader = new PageHeader(); try { if (lastPage != null) { int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); switch (pageHeader.type) { case DICTIONARY_PAGE: dictionaryPage = new DictionaryPage( decompressor.decompress(BytesInput.from(in, pageHeader.compressed_page_size), pageHeader.getUncompressed_page_size()), pageHeader.uncompressed_page_size, parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding) buffer.flip(); return new DataPageV1( decompressor.decompress(BytesInput.from(buffer), pageHeader.getUncompressed_page_size()), pageHeader.data_page_header.num_values, pageHeader.uncompressed_page_size, HadoopStreams.wrap(in).readFully(buffer); buffer.flip(); DataPageHeaderV2 dataHeaderV2 = pageHeader.getData_page_header_v2(); int dataSize = compressedPageSize - dataHeaderV2.getRepetition_levels_byte_length() - dataHeaderV2.getDefinition_levels_byte_length(); BytesInput decompressedPageData =
private void readDictionaryPage(final PageHeader pageHeader, final ColumnReader<?> parentStatus) throws IOException { int compressedSize = pageHeader.getCompressed_page_size(); int uncompressedSize = pageHeader.getUncompressed_page_size(); final DrillBuf dictionaryData = readPage(pageHeader, compressedSize, uncompressedSize); allocatedDictionaryBuffers.add(dictionaryData); DictionaryPage page = new DictionaryPage( asBytesInput(dictionaryData, 0, uncompressedSize), pageHeader.uncompressed_page_size, pageHeader.dictionary_page_header.num_values, valueOf(pageHeader.dictionary_page_header.encoding.name())); this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page); }
try { PageHeader pageHeader = Util.readPageHeader(parent.dataReader); int compressedSize = pageHeader.getCompressed_page_size(); if ( parent.parentColumnReader.isShuttingDown ) { return null; } //Opportunity to skip expensive Parquet processing pageData = parent.dataReader.getNext(compressedSize); if (pageHeader.getType() == PageType.DICTIONARY_PAGE) { readStatus.setIsDictionaryPage(true); valuesRead += pageHeader.getDictionary_page_header().getNum_values(); } else { valuesRead += pageHeader.getData_page_header().getNum_values(); parent.totalPageValuesRead += valuesRead;
/** * Get the page header and the pageData (uncompressed) for the next page */ protected void nextInternal() throws IOException{ Stopwatch timer = Stopwatch.createUnstarted(); // next, we need to decompress the bytes // TODO - figure out if we need multiple dictionary pages, I believe it may be limited to one // I think we are clobbering parts of the dictionary if there can be multiple pages of dictionary do { long start=dataReader.getPos(); timer.start(); pageHeader = Util.readPageHeader(dataReader); long timeToRead = timer.elapsed(TimeUnit.NANOSECONDS); long pageHeaderBytes=dataReader.getPos()-start; this.updateStats(pageHeader, "Page Header", start, timeToRead, pageHeaderBytes, pageHeaderBytes); logger.trace("ParquetTrace,{},{},{},{},{},{},{},{}","Page Header Read","", this.parentColumnReader.parentReader.hadoopPath, this.parentColumnReader.columnDescriptor.toString(), start, 0, 0, timeToRead); timer.reset(); if (pageHeader.getType() == PageType.DICTIONARY_PAGE) { readDictionaryPage(pageHeader, parentColumnReader); } } while (pageHeader.getType() == PageType.DICTIONARY_PAGE); int compressedSize = pageHeader.getCompressed_page_size(); int uncompressedSize = pageHeader.getUncompressed_page_size(); pageData = readPage(pageHeader, compressedSize, uncompressedSize); }
@Override public DictionaryPage readDictionaryPage() { if (dictionaryPage == null) { PageHeader pageHeader = new PageHeader(); long pos = 0; try { pos = in.getPos(); pageHeader = Util.readPageHeader(in); if (pageHeader.getDictionary_page_header() == null) { in.seek(pos); return null; decompressor.decompress(BytesInput.from(in, pageHeader.compressed_page_size), pageHeader.getUncompressed_page_size()), pageHeader.getDictionary_page_header().getNum_values(), parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding) );
private PageHeader newDataPageHeader( int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.statistics.Statistics statistics, org.apache.parquet.column.Encoding rlEncoding, org.apache.parquet.column.Encoding dlEncoding, org.apache.parquet.column.Encoding valuesEncoding) { PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE, uncompressedSize, compressedSize); // TODO: pageHeader.crc = ...; pageHeader.setData_page_header(new DataPageHeader( valueCount, getEncoding(valuesEncoding), getEncoding(dlEncoding), getEncoding(rlEncoding))); if (!statistics.isEmpty()) { pageHeader.getData_page_header().setStatistics(toParquetStatistics(statistics)); } return pageHeader; }
public void writeDictionaryPageHeader( int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.Encoding valuesEncoding, OutputStream to) throws IOException { PageHeader pageHeader = new PageHeader(PageType.DICTIONARY_PAGE, uncompressedSize, compressedSize); pageHeader.setDictionary_page_header(new DictionaryPageHeader(valueCount, getEncoding(valuesEncoding))); writePageHeader(pageHeader, to); }
private PageHeader newDataPageV2Header( int uncompressedSize, int compressedSize, int valueCount, int nullCount, int rowCount, org.apache.parquet.column.Encoding dataEncoding, int rlByteLength, int dlByteLength) { // TODO: pageHeader.crc = ...; DataPageHeaderV2 dataPageHeaderV2 = new DataPageHeaderV2( valueCount, nullCount, rowCount, getEncoding(dataEncoding), dlByteLength, rlByteLength); PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE_V2, uncompressedSize, compressedSize); pageHeader.setData_page_header_v2(dataPageHeaderV2); return pageHeader; }
private PageHeader newDataPageHeader( int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.Encoding rlEncoding, org.apache.parquet.column.Encoding dlEncoding, org.apache.parquet.column.Encoding valuesEncoding) { PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE, uncompressedSize, compressedSize); // TODO: pageHeader.crc = ...; pageHeader.setData_page_header(new DataPageHeader( valueCount, getEncoding(valuesEncoding), getEncoding(dlEncoding), getEncoding(rlEncoding))); return pageHeader; }
private DictionaryPage readDictionaryPageHelper(PageHeader pageHeader) throws IOException { ByteBuffer data = uncompressPage(pageHeader, false); return new DictionaryPage( BytesInput.from(data, 0, pageHeader.uncompressed_page_size), pageHeader.getDictionary_page_header().getNum_values(), parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding) ); }
public PageHeader deepCopy() { return new PageHeader(this); }
private void readDictionaryPageData(final ReadStatus readStatus, final ColumnReader<?> parentStatus) throws UserException { try { pageHeader = readStatus.getPageHeader(); int uncompressedSize = pageHeader.getUncompressed_page_size(); final DrillBuf dictionaryData = getDecompressedPageData(readStatus); Stopwatch timer = Stopwatch.createStarted(); allocatedDictionaryBuffers.add(dictionaryData); DictionaryPage page = new DictionaryPage(asBytesInput(dictionaryData, 0, uncompressedSize), pageHeader.uncompressed_page_size, pageHeader.dictionary_page_header.num_values, valueOf(pageHeader.dictionary_page_header.encoding.name())); this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page); long timeToDecode = timer.elapsed(TimeUnit.NANOSECONDS); stats.timeDictPageDecode.addAndGet(timeToDecode); } catch (Exception e) { handleAndThrowException(e, "Error decoding dictionary page."); } }
private long readDataPageV2(PageHeader pageHeader, int uncompressedPageSize, int compressedPageSize, List<DataPage> pages) { DataPageHeaderV2 dataHeaderV2 = pageHeader.getData_page_header_v2(); int dataSize = compressedPageSize - dataHeaderV2.getRepetition_levels_byte_length() - dataHeaderV2.getDefinition_levels_byte_length(); pages.add(new DataPageV2( dataHeaderV2.getNum_rows(), dataHeaderV2.getNum_nulls(), dataHeaderV2.getNum_values(), getSlice(dataHeaderV2.getRepetition_levels_byte_length()), getSlice(dataHeaderV2.getDefinition_levels_byte_length()), getParquetEncoding(Encoding.valueOf(dataHeaderV2.getEncoding().name())), getSlice(dataSize), uncompressedPageSize, MetadataReader.readStats( dataHeaderV2.getStatistics(), descriptor.getColumnDescriptor().getType()), dataHeaderV2.isIs_compressed())); return dataHeaderV2.getNum_values(); } }