private static ColumnarMap toColumnarMap(DictionaryBlock dictionaryBlock) { ColumnarMap columnarMap = toColumnarMap(dictionaryBlock.getDictionary()); // build new offsets int[] offsets = new int[dictionaryBlock.getPositionCount() + 1]; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { int dictionaryId = dictionaryBlock.getId(position); offsets[position + 1] = offsets[position] + columnarMap.getEntryCount(dictionaryId); } // reindex dictionary int[] dictionaryIds = new int[offsets[dictionaryBlock.getPositionCount()]]; int nextDictionaryIndex = 0; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { int dictionaryId = dictionaryBlock.getId(position); int entryCount = columnarMap.getEntryCount(dictionaryId); // adjust to the element block start offset int startOffset = columnarMap.getOffset(dictionaryId) - columnarMap.getOffset(0); for (int entryIndex = 0; entryIndex < entryCount; entryIndex++) { dictionaryIds[nextDictionaryIndex] = startOffset + entryIndex; nextDictionaryIndex++; } } return new ColumnarMap( dictionaryBlock, 0, offsets, new DictionaryBlock(dictionaryIds.length, columnarMap.getKeysBlock(), dictionaryIds), new DictionaryBlock(dictionaryIds.length, columnarMap.getValuesBlock(), dictionaryIds)); }
@Override public Block getPositions(int[] positions, int offset, int length) { checkArrayRange(positions, offset, length); int[] newIds = new int[length]; boolean isCompact = isCompact() && length >= dictionary.getPositionCount(); boolean[] seen = null; if (isCompact) { seen = new boolean[dictionary.getPositionCount()]; } for (int i = 0; i < length; i++) { newIds[i] = getId(positions[offset + i]); if (isCompact) { seen[newIds[i]] = true; } } for (int i = 0; i < dictionary.getPositionCount() && isCompact; i++) { isCompact &= seen[i]; } return new DictionaryBlock(newIds.length, getDictionary(), newIds, isCompact, getDictionarySourceId()); }
@Test public void testCompact() { Slice[] expectedValues = createExpectedValues(5); DictionaryBlock dictionaryBlock = createDictionaryBlockWithUnreferencedKeys(expectedValues, 10); assertEquals(dictionaryBlock.isCompact(), false); DictionaryBlock compactBlock = dictionaryBlock.compact(); assertNotEquals(dictionaryBlock.getDictionarySourceId(), compactBlock.getDictionarySourceId()); assertEquals(compactBlock.getDictionary().getPositionCount(), (expectedValues.length / 2) + 1); assertBlock(compactBlock.getDictionary(), TestDictionaryBlock::createBlockBuilder, new Slice[] {expectedValues[0], expectedValues[1], expectedValues[3]}); assertDictionaryIds(compactBlock, 0, 1, 1, 2, 2, 0, 1, 1, 2, 2); assertEquals(compactBlock.isCompact(), true); DictionaryBlock reCompactedBlock = compactBlock.compact(); assertEquals(reCompactedBlock.getDictionarySourceId(), compactBlock.getDictionarySourceId()); }
@Override public Block copyRegion(int position, int length) { checkValidRegion(positionCount, position, length); int[] newIds = Arrays.copyOfRange(ids, idsOffset + position, idsOffset + position + length); DictionaryBlock dictionaryBlock = new DictionaryBlock(dictionary, newIds); return dictionaryBlock.compact(); }
@Override public long getRegionSizeInBytes(int positionOffset, int length) { if (positionOffset == 0 && length == getPositionCount()) { // Calculation of getRegionSizeInBytes is expensive in this class. // On the other hand, getSizeInBytes result is cached. return getSizeInBytes(); } boolean[] used = new boolean[dictionary.getPositionCount()]; for (int i = positionOffset; i < positionOffset + length; i++) { used[getId(i)] = true; } return dictionary.getPositionsSizeInBytes(used) + Integer.BYTES * (long) length; }
private static SelectedPositions selectDictionaryPositions(DictionaryBlock dictionaryBlock, boolean[] selectedDictionaryPositions) { int selectedCount = 0; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { if (selectedDictionaryPositions[dictionaryBlock.getId(position)]) { selectedCount++; } } if (selectedCount == 0 || selectedCount == dictionaryBlock.getPositionCount()) { return SelectedPositions.positionsRange(0, selectedCount); } int[] positions = new int[selectedCount]; int index = 0; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { if (selectedDictionaryPositions[dictionaryBlock.getId(position)]) { positions[index] = position; index++; } } return SelectedPositions.positionsList(positions, 0, selectedCount); }
@Override public Block getRegion(int positionOffset, int length) { checkValidRegion(positionCount, positionOffset, length); return new DictionaryBlock(idsOffset + positionOffset, length, dictionary, ids, false, dictionarySourceId); }
@Override public void writeBlock(BlockEncodingSerde blockEncodingSerde, SliceOutput sliceOutput, Block block) { // The down casts here are safe because it is the block itself the provides this encoding implementation. DictionaryBlock dictionaryBlock = (DictionaryBlock) block; dictionaryBlock = dictionaryBlock.compact(); // positionCount int positionCount = dictionaryBlock.getPositionCount(); sliceOutput.appendInt(positionCount); // dictionary Block dictionary = dictionaryBlock.getDictionary(); blockEncodingSerde.writeBlock(sliceOutput, dictionary); // ids sliceOutput.writeBytes(dictionaryBlock.getIds()); // instance id sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getMostSignificantBits()); sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getLeastSignificantBits()); sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getSequenceId()); }
@Test public void testCompactAllKeysReferenced() { Slice[] expectedValues = createExpectedValues(5); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 10); DictionaryBlock compactBlock = dictionaryBlock.compact(); // When there is nothing to compact, we return the same block assertEquals(compactBlock.getDictionary(), dictionaryBlock.getDictionary()); assertEquals(compactBlock.getPositionCount(), dictionaryBlock.getPositionCount()); for (int position = 0; position < compactBlock.getPositionCount(); position++) { assertEquals(compactBlock.getId(position), dictionaryBlock.getId(position)); } assertEquals(compactBlock.isCompact(), true); }
@Test public void testCopyPositionsNoCompaction() { Slice[] expectedValues = createExpectedValues(1); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); int[] positionsToCopy = new int[] {0, 2, 4, 5}; DictionaryBlock copiedBlock = (DictionaryBlock) dictionaryBlock.copyPositions(positionsToCopy, 0, positionsToCopy.length); assertEquals(copiedBlock.getPositionCount(), positionsToCopy.length); assertBlock(copiedBlock.getDictionary(), TestDictionaryBlock::createBlockBuilder, expectedValues); }
public DictionaryBlock compact() if (isCompact()) { return this; int dictionaryIndex = getId(i); if (remapIndex[dictionaryIndex] == -1) { dictionaryPositionsToCopy.add(dictionaryIndex); int newId = remapIndex[getId(i)]; if (newId == -1) { throw new IllegalStateException("reference to a non-existent key"); return new DictionaryBlock(positionCount, compactDictionary, newIds, true);
DictionaryBlock block = new DictionaryBlock(createSlicesBlock(createExpectedValues(10)), new int[] {0, 1, 2, 3, 4, 5}).compact(); block = (DictionaryBlock) block.getPositions(new int[] {3, 3, 4, 5, 2, 0, 1, 1}, 0, 7); assertTrue(block.isCompact()); block = (DictionaryBlock) block.getPositions(new int[] {0, 1, 2, 3, 4, 5, 6, 6, 5, 4, 3, 2, 1}, 0, 12); assertTrue(block.isCompact()); block = (DictionaryBlock) block.getPositions(new int[] {0, 2, 0, 2, 0}, 0, 5); assertFalse(block.isCompact()); block = block.compact(); block = (DictionaryBlock) block.getPositions(new int[] {0, 1, 1, 1}, 0, 4); assertTrue(block.isCompact()); block = (DictionaryBlock) block.getPositions(new int[] {1, 1, 1, 1}, 0, 4); assertFalse(block.isCompact()); block = block.compact(); block = (DictionaryBlock) block.getPositions(new int[] {0}, 0, 1); assertTrue(block.isCompact()); block = (DictionaryBlock) block.getPositions(new int[] {}, 0, 0); assertFalse(block.isCompact()); block = block.compact();
@Test public void testCopyPositionsWithCompactionsAndReorder() throws Exception { Slice[] expectedValues = createExpectedValues(10); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); List<Integer> positionsToCopy = Ints.asList(50, 55, 40, 45, 60); DictionaryBlock copiedBlock = (DictionaryBlock) dictionaryBlock.copyPositions(positionsToCopy); assertEquals(copiedBlock.getDictionary().getPositionCount(), 2); assertEquals(copiedBlock.getPositionCount(), positionsToCopy.size()); assertBlock(copiedBlock.getDictionary(), new Slice[] { expectedValues[0], expectedValues[5] }); assertEquals(copiedBlock.getIds(), wrappedIntArray(0, 1, 0, 1, 0)); }
private static int[] getNewIds(int positionCount, DictionaryBlock dictionaryBlock, int[] remapIndex) { int[] newIds = new int[positionCount]; for (int i = 0; i < positionCount; i++) { int newId = remapIndex[dictionaryBlock.getId(i)]; if (newId == -1) { throw new IllegalStateException("reference to a non-existent key"); } newIds[i] = newId; } return newIds; }
Block dictionaryBlock = new DictionaryBlock(createSlicesBlock(expectedValues), new int[] {0, 1, 2, 3, 4, 5}); assertBlock(dictionaryBlock, TestDictionaryBlock::createBlockBuilder, new Slice[] {expectedValues[0], expectedValues[1], expectedValues[2], expectedValues[3], expectedValues[4], expectedValues[5]}); DictionaryId dictionaryId = ((DictionaryBlock) dictionaryBlock).getDictionarySourceId(); assertEquals(((DictionaryBlock) dictionaryBlock).getDictionarySourceId(), dictionaryId); assertEquals(((DictionaryBlock) dictionaryBlock).getDictionarySourceId(), dictionaryId); assertEquals(((DictionaryBlock) dictionaryBlock).getDictionarySourceId(), dictionaryId); assertEquals(((DictionaryBlock) dictionaryBlock).getDictionarySourceId(), dictionaryId); assertEquals(((DictionaryBlock) dictionaryBlock).getDictionarySourceId(), dictionaryId);
@Override public Block getLoadedBlock() { Block loadedDictionary = dictionary.getLoadedBlock(); if (loadedDictionary == dictionary) { return this; } return new DictionaryBlock(idsOffset, getPositionCount(), loadedDictionary, ids, false, randomDictionaryId()); }
public AddDictionaryPageWork(Page page) { verify(canProcessDictionary(page), "invalid call to addDictionaryPage"); this.page = requireNonNull(page, "page is null"); this.dictionaryBlock = (DictionaryBlock) page.getBlock(channels[0]); updateDictionaryLookBack(dictionaryBlock.getDictionary()); this.dictionaryPage = createPageWithExtractedDictionary(page); }
@Override public DictionaryId apply(DictionaryBlock block) { return dictionarySourceIds.computeIfAbsent(block.getDictionarySourceId(), ignored -> randomDictionaryId()); }
@Test public void testSizeInBytes() { Slice[] expectedValues = createExpectedValues(10); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); assertEquals(dictionaryBlock.getSizeInBytes(), dictionaryBlock.getDictionary().getSizeInBytes() + (100 * SIZE_OF_INT)); }
@Test public void testDictionaryBlockGetRegion() throws Exception { Slice[] expectedValues = createExpectedValues(3); int[] ids = new int[] { 0, 2, 1, 0, 0, 0, 1, 1, 1, 0, 1, 2 }; boolean[] isNull = new boolean[ids.length]; isNull[2] = true; LazyBlockLoader<LazySliceArrayBlock> loader = new TestDictionaryLazySliceArrayBlockLoader(expectedValues, ids, isNull); LazySliceArrayBlock block = new LazySliceArrayBlock(ids.length, loader); Block region = block.getRegion(0, 3); assertFalse(region.isNull(0)); assertFalse(region.isNull(1)); assertTrue(region.isNull(2)); assertTrue(region instanceof DictionaryBlock); DictionaryBlock dictionaryBlock = (DictionaryBlock) region; assertEquals(((SliceArrayBlock) dictionaryBlock.getDictionary()).getValues(), new Slice[] { expectedValues[0], expectedValues[2], null }); // The values in the dictionary are rearranged during compaction in the order in which they are referenced, // with a null appended to the end of the list if applicable assertEquals(dictionaryBlock.getIds(), Slices.wrappedIntArray(0, 1, 2)); }