@Test @SuppressWarnings("deprecation") public void testDictionaryEncodingCasesV1() { Set<Encoding> required = ImmutableSet.of(BIT_PACKED); Set<Encoding> optional = ImmutableSet.of(BIT_PACKED, RLE); Set<Encoding> repeated = ImmutableSet.of(RLE); Set<Encoding> notDictionary = ImmutableSet.of(PLAIN); Set<Encoding> mixedDictionary = ImmutableSet.of(PLAIN_DICTIONARY, PLAIN); Set<Encoding> dictionary = ImmutableSet.of(PLAIN_DICTIONARY); assertFalse(isOnlyDictionaryEncodingPages(union(required, notDictionary)), "required notDictionary"); assertFalse(isOnlyDictionaryEncodingPages(union(optional, notDictionary)), "optional notDictionary"); assertFalse(isOnlyDictionaryEncodingPages(union(repeated, notDictionary)), "repeated notDictionary"); assertFalse(isOnlyDictionaryEncodingPages(union(required, mixedDictionary)), "required mixedDictionary"); assertFalse(isOnlyDictionaryEncodingPages(union(optional, mixedDictionary)), "optional mixedDictionary"); assertFalse(isOnlyDictionaryEncodingPages(union(repeated, mixedDictionary)), "repeated mixedDictionary"); assertTrue(isOnlyDictionaryEncodingPages(union(required, dictionary)), "required dictionary"); assertTrue(isOnlyDictionaryEncodingPages(union(optional, dictionary)), "optional dictionary"); assertTrue(isOnlyDictionaryEncodingPages(union(repeated, dictionary)), "repeated dictionary"); }
private static Map<ColumnDescriptor, DictionaryDescriptor> getDictionaries(BlockMetaData blockMetadata, ParquetDataSource dataSource, Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<ColumnDescriptor> parquetTupleDomain) { ImmutableMap.Builder<ColumnDescriptor, DictionaryDescriptor> dictionaries = ImmutableMap.builder(); for (ColumnChunkMetaData columnMetaData : blockMetadata.getColumns()) { RichColumnDescriptor descriptor = descriptorsByPath.get(Arrays.asList(columnMetaData.getPath().toArray())); if (descriptor != null) { if (isOnlyDictionaryEncodingPages(columnMetaData.getEncodings()) && isColumnPredicate(descriptor, parquetTupleDomain)) { int totalSize = toIntExact(columnMetaData.getTotalSize()); byte[] buffer = new byte[totalSize]; dataSource.readFully(columnMetaData.getStartingPos(), buffer); Optional<DictionaryPage> dictionaryPage = readDictionaryPage(buffer, columnMetaData.getCodec()); dictionaries.put(descriptor, new DictionaryDescriptor(descriptor, dictionaryPage)); break; } } } return dictionaries.build(); }
@Test @SuppressWarnings("deprecation") public void testDictionaryEncodingCasesV1() { Set<Encoding> required = ImmutableSet.of(BIT_PACKED); Set<Encoding> optional = ImmutableSet.of(BIT_PACKED, RLE); Set<Encoding> repeated = ImmutableSet.of(RLE); Set<Encoding> notDictionary = ImmutableSet.of(PLAIN); Set<Encoding> mixedDictionary = ImmutableSet.of(PLAIN_DICTIONARY, PLAIN); Set<Encoding> dictionary = ImmutableSet.of(PLAIN_DICTIONARY); assertFalse(isOnlyDictionaryEncodingPages(union(required, notDictionary)), "required notDictionary"); assertFalse(isOnlyDictionaryEncodingPages(union(optional, notDictionary)), "optional notDictionary"); assertFalse(isOnlyDictionaryEncodingPages(union(repeated, notDictionary)), "repeated notDictionary"); assertFalse(isOnlyDictionaryEncodingPages(union(required, mixedDictionary)), "required mixedDictionary"); assertFalse(isOnlyDictionaryEncodingPages(union(optional, mixedDictionary)), "optional mixedDictionary"); assertFalse(isOnlyDictionaryEncodingPages(union(repeated, mixedDictionary)), "repeated mixedDictionary"); assertTrue(isOnlyDictionaryEncodingPages(union(required, dictionary)), "required dictionary"); assertTrue(isOnlyDictionaryEncodingPages(union(optional, dictionary)), "optional dictionary"); assertTrue(isOnlyDictionaryEncodingPages(union(repeated, dictionary)), "repeated dictionary"); }