private List<Encoding> toFormatEncodings(Set<org.apache.parquet.column.Encoding> encodings) { List<Encoding> converted = new ArrayList<Encoding>(encodings.size()); for (org.apache.parquet.column.Encoding encoding : encodings) { converted.add(getEncoding(encoding)); } return converted; }
private List<Encoding> toFormatEncodings(Set<org.apache.parquet.column.Encoding> encodings) { List<Encoding> converted = new ArrayList<Encoding>(encodings.size()); for (org.apache.parquet.column.Encoding encoding : encodings) { converted.add(getEncoding(encoding)); } return converted; }
Set<org.apache.parquet.column.Encoding> fromFormatEncodings(List<Encoding> encodings) { Set<org.apache.parquet.column.Encoding> converted = new HashSet<org.apache.parquet.column.Encoding>(); for (Encoding encoding : encodings) { converted.add(getEncoding(encoding)); } // make converted unmodifiable, drop reference to modifiable copy converted = Collections.unmodifiableSet(converted); // atomically update the cache Set<org.apache.parquet.column.Encoding> cached = cachedEncodingSets.putIfAbsent(converted, converted); if (cached == null) { // cached == null signifies that converted was *not* in the cache previously // so we can return converted instead of throwing it away, it has now // been cached cached = converted; } return cached; }
Set<org.apache.parquet.column.Encoding> fromFormatEncodings(List<Encoding> encodings) { Set<org.apache.parquet.column.Encoding> converted = new HashSet<org.apache.parquet.column.Encoding>(); for (Encoding encoding : encodings) { converted.add(getEncoding(encoding)); } // make converted unmodifiable, drop reference to modifiable copy converted = Collections.unmodifiableSet(converted); // atomically update the cache Set<org.apache.parquet.column.Encoding> cached = cachedEncodingSets.putIfAbsent(converted, converted); if (cached == null) { // cached == null signifies that converted was *not* in the cache previously // so we can return converted instead of throwing it away, it has now // been cached cached = converted; } return cached; }
private PageHeader newDataPageHeader( int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.Encoding rlEncoding, org.apache.parquet.column.Encoding dlEncoding, org.apache.parquet.column.Encoding valuesEncoding) { PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE, uncompressedSize, compressedSize); // TODO: pageHeader.crc = ...; pageHeader.setData_page_header(new DataPageHeader( valueCount, getEncoding(valuesEncoding), getEncoding(dlEncoding), getEncoding(rlEncoding))); return pageHeader; }
public List<PageEncodingStats> convertEncodingStats(EncodingStats stats) { if (stats == null) { return null; } List<PageEncodingStats> formatStats = new ArrayList<PageEncodingStats>(); for (org.apache.parquet.column.Encoding encoding : stats.getDictionaryEncodings()) { formatStats.add(new PageEncodingStats( PageType.DICTIONARY_PAGE, getEncoding(encoding), stats.getNumDictionaryPagesEncodedAs(encoding))); } PageType dataPageType = (stats.usesV2Pages() ? PageType.DATA_PAGE_V2 : PageType.DATA_PAGE); for (org.apache.parquet.column.Encoding encoding : stats.getDataEncodings()) { formatStats.add(new PageEncodingStats( dataPageType, getEncoding(encoding), stats.getNumDataPagesEncodedAs(encoding))); } return formatStats; }
public List<PageEncodingStats> convertEncodingStats(EncodingStats stats) { if (stats == null) { return null; } List<PageEncodingStats> formatStats = new ArrayList<PageEncodingStats>(); for (org.apache.parquet.column.Encoding encoding : stats.getDictionaryEncodings()) { formatStats.add(new PageEncodingStats( PageType.DICTIONARY_PAGE, getEncoding(encoding), stats.getNumDictionaryPagesEncodedAs(encoding))); } PageType dataPageType = (stats.usesV2Pages() ? PageType.DATA_PAGE_V2 : PageType.DATA_PAGE); for (org.apache.parquet.column.Encoding encoding : stats.getDataEncodings()) { formatStats.add(new PageEncodingStats( dataPageType, getEncoding(encoding), stats.getNumDataPagesEncodedAs(encoding))); } return formatStats; }
private PageHeader newDataPageHeader( int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.statistics.Statistics statistics, org.apache.parquet.column.Encoding rlEncoding, org.apache.parquet.column.Encoding dlEncoding, org.apache.parquet.column.Encoding valuesEncoding) { PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE, uncompressedSize, compressedSize); // TODO: pageHeader.crc = ...; pageHeader.setData_page_header(new DataPageHeader( valueCount, getEncoding(valuesEncoding), getEncoding(dlEncoding), getEncoding(rlEncoding))); if (!statistics.isEmpty()) { pageHeader.getData_page_header().setStatistics(toParquetStatistics(statistics)); } return pageHeader; }
private PageHeader newDataPageV2Header( int uncompressedSize, int compressedSize, int valueCount, int nullCount, int rowCount, org.apache.parquet.column.Encoding dataEncoding, int rlByteLength, int dlByteLength) { // TODO: pageHeader.crc = ...; DataPageHeaderV2 dataPageHeaderV2 = new DataPageHeaderV2( valueCount, nullCount, rowCount, getEncoding(dataEncoding), dlByteLength, rlByteLength); PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE_V2, uncompressedSize, compressedSize); pageHeader.setData_page_header_v2(dataPageHeaderV2); return pageHeader; }
public void writeDictionaryPageHeader( int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.Encoding valuesEncoding, OutputStream to) throws IOException { PageHeader pageHeader = new PageHeader(PageType.DICTIONARY_PAGE, uncompressedSize, compressedSize); pageHeader.setDictionary_page_header(new DictionaryPageHeader(valueCount, getEncoding(valuesEncoding))); writePageHeader(pageHeader, to); }
public void writeDictionaryPageHeader( int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.Encoding valuesEncoding, OutputStream to) throws IOException { PageHeader pageHeader = new PageHeader(PageType.DICTIONARY_PAGE, uncompressedSize, compressedSize); pageHeader.setDictionary_page_header(new DictionaryPageHeader(valueCount, getEncoding(valuesEncoding))); writePageHeader(pageHeader, to); }
private DictionaryPage readDictionaryPageHelper(PageHeader pageHeader) throws IOException { ByteBuffer data = uncompressPage(pageHeader, false); return new DictionaryPage( BytesInput.from(data, 0, pageHeader.uncompressed_page_size), pageHeader.getDictionary_page_header().getNum_values(), parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding) ); }
public EncodingStats convertEncodingStats(List<PageEncodingStats> stats) { if (stats == null) { return null; } EncodingStats.Builder builder = new EncodingStats.Builder(); for (PageEncodingStats stat : stats) { switch (stat.getPage_type()) { case DATA_PAGE_V2: builder.withV2Pages(); // falls through case DATA_PAGE: builder.addDataEncoding( getEncoding(stat.getEncoding()), stat.getCount()); break; case DICTIONARY_PAGE: builder.addDictEncoding( getEncoding(stat.getEncoding()), stat.getCount()); break; } } return builder.build(); }
public EncodingStats convertEncodingStats(List<PageEncodingStats> stats) { if (stats == null) { return null; } EncodingStats.Builder builder = new EncodingStats.Builder(); for (PageEncodingStats stat : stats) { switch (stat.getPage_type()) { case DATA_PAGE_V2: builder.withV2Pages(); // falls through case DATA_PAGE: builder.addDataEncoding( getEncoding(stat.getEncoding()), stat.getCount()); break; case DICTIONARY_PAGE: builder.addDictEncoding( getEncoding(stat.getEncoding()), stat.getCount()); break; } } return builder.build(); }
public static Dictionary readDictionary(FSDataInputStream in, ColumnDescriptor column, PageHeaderWithOffset pageHeader, BytesDecompressor decompressor) throws IOException { in.seek(pageHeader.getOffset()); final byte[] data = new byte[pageHeader.getPageHeader().getCompressed_page_size()]; int read = in.read(data); if (read != data.length) { throw new IOException(format("Failed to read dictionary page, read %d bytes, expected %d", read, data.length)); } final DictionaryPage dictionaryPage = new DictionaryPage( decompressor.decompress(BytesInput.from(data), pageHeader.getPageHeader().getUncompressed_page_size()), pageHeader.getPageHeader().getDictionary_page_header().getNum_values(), CONVERTER.getEncoding(pageHeader.getPageHeader().getDictionary_page_header().getEncoding())); return dictionaryPage.getEncoding().initDictionary(column, dictionaryPage); }
private PageHeader newDataPageV2Header( int uncompressedSize, int compressedSize, int valueCount, int nullCount, int rowCount, org.apache.parquet.column.statistics.Statistics<?> statistics, org.apache.parquet.column.Encoding dataEncoding, int rlByteLength, int dlByteLength) { // TODO: pageHeader.crc = ...; DataPageHeaderV2 dataPageHeaderV2 = new DataPageHeaderV2( valueCount, nullCount, rowCount, getEncoding(dataEncoding), dlByteLength, rlByteLength); if (!statistics.isEmpty()) { dataPageHeaderV2.setStatistics( toParquetStatistics(statistics)); } PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE_V2, uncompressedSize, compressedSize); pageHeader.setData_page_header_v2(dataPageHeaderV2); return pageHeader; }
private DictionaryPage readCompressedDictionary( PageHeader pageHeader, SeekableInputStream fin) throws IOException { DictionaryPageHeader dictHeader = pageHeader.getDictionary_page_header(); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); byte [] dictPageBytes = new byte[compressedPageSize]; fin.readFully(dictPageBytes); BytesInput bin = BytesInput.from(dictPageBytes); return new DictionaryPage( bin, uncompressedPageSize, dictHeader.getNum_values(), converter.getEncoding(dictHeader.getEncoding())); }
decompressor.decompress(BytesInput.from(in, pageHeader.compressed_page_size), pageHeader.getUncompressed_page_size()), pageHeader.getDictionary_page_header().getNum_values(), parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding) ); } catch (Exception e) {
private DictionaryPage readCompressedDictionary( PageHeader pageHeader, SeekableInputStream fin) throws IOException { DictionaryPageHeader dictHeader = pageHeader.getDictionary_page_header(); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); byte [] dictPageBytes = new byte[compressedPageSize]; fin.readFully(dictPageBytes); BytesInput bin = BytesInput.from(dictPageBytes); return new DictionaryPage( bin, uncompressedPageSize, dictHeader.getNum_values(), converter.getEncoding(dictHeader.getEncoding())); }
Preconditions.checkState(currentPageCount > 0); final Encoding rlEncoding = METADATA_CONVERTER.getEncoding(pageHeader.data_page_header.repetition_level_encoding); final Encoding dlEncoding = METADATA_CONVERTER.getEncoding(pageHeader.data_page_header.definition_level_encoding);