private static Map<ColumnDescriptor, DictionaryDescriptor> getDictionaries(BlockMetaData blockMetadata, ParquetDataSource dataSource, Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<ColumnDescriptor> parquetTupleDomain) { ImmutableMap.Builder<ColumnDescriptor, DictionaryDescriptor> dictionaries = ImmutableMap.builder(); for (ColumnChunkMetaData columnMetaData : blockMetadata.getColumns()) { RichColumnDescriptor descriptor = descriptorsByPath.get(Arrays.asList(columnMetaData.getPath().toArray())); if (descriptor != null) { if (isOnlyDictionaryEncodingPages(columnMetaData.getEncodings()) && isColumnPredicate(descriptor, parquetTupleDomain)) { int totalSize = toIntExact(columnMetaData.getTotalSize()); byte[] buffer = new byte[totalSize]; dataSource.readFully(columnMetaData.getStartingPos(), buffer); Optional<DictionaryPage> dictionaryPage = readDictionaryPage(buffer, columnMetaData.getCodec()); dictionaries.put(descriptor, new DictionaryDescriptor(descriptor, dictionaryPage)); break; } } } return dictionaries.build(); }
private ColumnChunk readPrimitive(PrimitiveField field) throws IOException { ColumnDescriptor columnDescriptor = field.getDescriptor(); PrimitiveColumnReader columnReader = columnReaders[field.getId()]; if (columnReader.getPageReader() == null) { validateParquet(currentBlockMetadata.getRowCount() > 0, "Row group has 0 rows"); ColumnChunkMetaData metadata = getColumnChunkMetaData(columnDescriptor); long startingPosition = metadata.getStartingPos(); int totalSize = toIntExact(metadata.getTotalSize()); byte[] buffer = allocateBlock(totalSize); dataSource.readFully(startingPosition, buffer); ColumnChunkDescriptor descriptor = new ColumnChunkDescriptor(columnDescriptor, metadata, totalSize); ParquetColumnChunk columnChunk = new ParquetColumnChunk(descriptor, buffer, 0); columnReader.setPageReader(columnChunk.readAllPages()); } return columnReader.readPrimitive(field); }
/** * * @return the starting pos of first column */ public long getStartingPos() { return getColumns().get(0).getStartingPos(); } @Override
int totalSize = Ints.checkedCast(columnChunkMetaData.getTotalSize()); byte[] buffer = new byte[totalSize]; dataSource.readFully(columnChunkMetaData.getStartingPos(), buffer); DictionaryPage dictionaryPage = readDictionaryPage(buffer, codecFactory, columnChunkMetaData.getCodec()); dictionaries.put(ordinal, new ParquetDictionaryDescriptor(columnDescriptor, dictionaryPage));
public Block readBlock(ColumnDescriptor columnDescriptor, Type type) throws IOException { ParquetColumnReader columnReader = columnReadersMap.get(columnDescriptor); if (columnReader.getPageReader() == null) { validateParquet(currentBlockMetadata.getRowCount() > 0, "Row group having 0 rows"); ColumnChunkMetaData metadata = getColumnChunkMetaData(columnDescriptor); long startingPosition = metadata.getStartingPos(); int totalSize = Ints.checkedCast(metadata.getTotalSize()); byte[] buffer = new byte[totalSize]; dataSource.readFully(startingPosition, buffer); ParquetColumnChunkDescriptor descriptor = new ParquetColumnChunkDescriptor(columnDescriptor, metadata, startingPosition, totalSize); ParquetColumnChunk columnChunk = new ParquetColumnChunk(descriptor, buffer, 0, codecFactory); columnReader.setPageReader(columnChunk.readAllPages()); } return columnReader.readBlock(type); }
ColumnDescriptor columnDescriptor = paths.get(pathKey); if (columnDescriptor != null) { long startingPos = mc.getStartingPos();