private static ColumnarArray toColumnarArray(DictionaryBlock dictionaryBlock) { ColumnarArray columnarArray = toColumnarArray(dictionaryBlock.getDictionary()); // build new offsets int[] offsets = new int[dictionaryBlock.getPositionCount() + 1]; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { int dictionaryId = dictionaryBlock.getId(position); offsets[position + 1] = offsets[position] + columnarArray.getLength(dictionaryId); } // reindex dictionary int[] dictionaryIds = new int[offsets[dictionaryBlock.getPositionCount()]]; int nextDictionaryIndex = 0; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { int dictionaryId = dictionaryBlock.getId(position); int length = columnarArray.getLength(dictionaryId); // adjust to the element block start offset int startOffset = columnarArray.getOffset(dictionaryId) - columnarArray.getOffset(0); for (int entryIndex = 0; entryIndex < length; entryIndex++) { dictionaryIds[nextDictionaryIndex] = startOffset + entryIndex; nextDictionaryIndex++; } } return new ColumnarArray( dictionaryBlock, 0, offsets, new DictionaryBlock(dictionaryIds.length, columnarArray.getElementsBlock(), dictionaryIds)); }
@Override public Block getPositions(int[] positions, int offset, int length) { checkArrayRange(positions, offset, length); int[] newIds = new int[length]; boolean isCompact = isCompact() && length >= dictionary.getPositionCount(); boolean[] seen = null; if (isCompact) { seen = new boolean[dictionary.getPositionCount()]; } for (int i = 0; i < length; i++) { newIds[i] = getId(positions[offset + i]); if (isCompact) { seen[newIds[i]] = true; } } for (int i = 0; i < dictionary.getPositionCount() && isCompact; i++) { isCompact &= seen[i]; } return new DictionaryBlock(newIds.length, getDictionary(), newIds, isCompact, getDictionarySourceId()); }
@Test public void testCompactAllKeysReferenced() { Slice[] expectedValues = createExpectedValues(5); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 10); DictionaryBlock compactBlock = dictionaryBlock.compact(); // When there is nothing to compact, we return the same block assertEquals(compactBlock.getDictionary(), dictionaryBlock.getDictionary()); assertEquals(compactBlock.getPositionCount(), dictionaryBlock.getPositionCount()); for (int position = 0; position < compactBlock.getPositionCount(); position++) { assertEquals(compactBlock.getId(position), dictionaryBlock.getId(position)); } assertEquals(compactBlock.isCompact(), true); }
@Override public Block copyRegion(int position, int length) { checkValidRegion(positionCount, position, length); int[] newIds = Arrays.copyOfRange(ids, idsOffset + position, idsOffset + position + length); DictionaryBlock dictionaryBlock = new DictionaryBlock(dictionary, newIds); return dictionaryBlock.compact(); }
private static SelectedPositions selectDictionaryPositions(DictionaryBlock dictionaryBlock, boolean[] selectedDictionaryPositions) { int selectedCount = 0; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { if (selectedDictionaryPositions[dictionaryBlock.getId(position)]) { selectedCount++; } } if (selectedCount == 0 || selectedCount == dictionaryBlock.getPositionCount()) { return SelectedPositions.positionsRange(0, selectedCount); } int[] positions = new int[selectedCount]; int index = 0; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { if (selectedDictionaryPositions[dictionaryBlock.getId(position)]) { positions[index] = position; index++; } } return SelectedPositions.positionsList(positions, 0, selectedCount); }
@Override public long getRegionSizeInBytes(int positionOffset, int length) { if (positionOffset == 0 && length == getPositionCount()) { // Calculation of getRegionSizeInBytes is expensive in this class. // On the other hand, getSizeInBytes result is cached. return getSizeInBytes(); } boolean[] used = new boolean[dictionary.getPositionCount()]; for (int i = positionOffset; i < positionOffset + length; i++) { used[getId(i)] = true; } return dictionary.getPositionsSizeInBytes(used) + Integer.BYTES * (long) length; }
@Override public Block getRegion(int positionOffset, int length) { checkValidRegion(positionCount, positionOffset, length); return new DictionaryBlock(idsOffset + positionOffset, length, dictionary, ids, false, dictionarySourceId); }
@Test public void testCopyPositionsNoCompaction() { Slice[] expectedValues = createExpectedValues(1); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); int[] positionsToCopy = new int[] {0, 2, 4, 5}; DictionaryBlock copiedBlock = (DictionaryBlock) dictionaryBlock.copyPositions(positionsToCopy, 0, positionsToCopy.length); assertEquals(copiedBlock.getPositionCount(), positionsToCopy.length); assertBlock(copiedBlock.getDictionary(), TestDictionaryBlock::createBlockBuilder, expectedValues); }
@Override public void writeBlock(BlockEncodingSerde blockEncodingSerde, SliceOutput sliceOutput, Block block) { // The down casts here are safe because it is the block itself the provides this encoding implementation. DictionaryBlock dictionaryBlock = (DictionaryBlock) block; dictionaryBlock = dictionaryBlock.compact(); // positionCount int positionCount = dictionaryBlock.getPositionCount(); sliceOutput.appendInt(positionCount); // dictionary Block dictionary = dictionaryBlock.getDictionary(); blockEncodingSerde.writeBlock(sliceOutput, dictionary); // ids sliceOutput.writeBytes(dictionaryBlock.getIds()); // instance id sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getMostSignificantBits()); sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getLeastSignificantBits()); sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getSequenceId()); }
@Test public void testCompact() { Slice[] expectedValues = createExpectedValues(5); DictionaryBlock dictionaryBlock = createDictionaryBlockWithUnreferencedKeys(expectedValues, 10); assertEquals(dictionaryBlock.isCompact(), false); DictionaryBlock compactBlock = dictionaryBlock.compact(); assertNotEquals(dictionaryBlock.getDictionarySourceId(), compactBlock.getDictionarySourceId()); assertEquals(compactBlock.getDictionary().getPositionCount(), (expectedValues.length / 2) + 1); assertBlock(compactBlock.getDictionary(), TestDictionaryBlock::createBlockBuilder, new Slice[] {expectedValues[0], expectedValues[1], expectedValues[3]}); assertDictionaryIds(compactBlock, 0, 1, 1, 2, 2, 0, 1, 1, 2, 2); assertEquals(compactBlock.isCompact(), true); DictionaryBlock reCompactedBlock = compactBlock.compact(); assertEquals(reCompactedBlock.getDictionarySourceId(), compactBlock.getDictionarySourceId()); }
private static int[] getNewIds(int positionCount, DictionaryBlock dictionaryBlock, int[] remapIndex) { int[] newIds = new int[positionCount]; for (int i = 0; i < positionCount; i++) { int newId = remapIndex[dictionaryBlock.getId(i)]; if (newId == -1) { throw new IllegalStateException("reference to a non-existent key"); } newIds[i] = newId; } return newIds; }
DictionaryBlock block = new DictionaryBlock(createSlicesBlock(createExpectedValues(10)), new int[] {0, 1, 2, 3, 4, 5}).compact(); block = (DictionaryBlock) block.getPositions(new int[] {3, 3, 4, 5, 2, 0, 1, 1}, 0, 7); assertTrue(block.isCompact()); block = (DictionaryBlock) block.getPositions(new int[] {0, 1, 2, 3, 4, 5, 6, 6, 5, 4, 3, 2, 1}, 0, 12); assertTrue(block.isCompact()); block = (DictionaryBlock) block.getPositions(new int[] {0, 2, 0, 2, 0}, 0, 5); assertFalse(block.isCompact()); block = block.compact(); block = (DictionaryBlock) block.getPositions(new int[] {0, 1, 1, 1}, 0, 4); assertTrue(block.isCompact()); block = (DictionaryBlock) block.getPositions(new int[] {1, 1, 1, 1}, 0, 4); assertFalse(block.isCompact()); block = block.compact(); block = (DictionaryBlock) block.getPositions(new int[] {0}, 0, 1); assertTrue(block.isCompact()); block = (DictionaryBlock) block.getPositions(new int[] {}, 0, 0); assertFalse(block.isCompact()); block = block.compact();
public DictionaryBlock compact() if (isCompact()) { return this; int dictionaryIndex = getId(i); if (remapIndex[dictionaryIndex] == -1) { dictionaryPositionsToCopy.add(dictionaryIndex); int newId = remapIndex[getId(i)]; if (newId == -1) { throw new IllegalStateException("reference to a non-existent key"); return new DictionaryBlock(positionCount, compactDictionary, newIds, true);
Block dictionaryBlock = new DictionaryBlock(createSlicesBlock(expectedValues), new int[] {0, 1, 2, 3, 4, 5}); assertBlock(dictionaryBlock, TestDictionaryBlock::createBlockBuilder, new Slice[] {expectedValues[0], expectedValues[1], expectedValues[2], expectedValues[3], expectedValues[4], expectedValues[5]}); DictionaryId dictionaryId = ((DictionaryBlock) dictionaryBlock).getDictionarySourceId(); assertEquals(((DictionaryBlock) dictionaryBlock).getDictionarySourceId(), dictionaryId); assertEquals(((DictionaryBlock) dictionaryBlock).getDictionarySourceId(), dictionaryId); assertEquals(((DictionaryBlock) dictionaryBlock).getDictionarySourceId(), dictionaryId); assertEquals(((DictionaryBlock) dictionaryBlock).getDictionarySourceId(), dictionaryId); assertEquals(((DictionaryBlock) dictionaryBlock).getDictionarySourceId(), dictionaryId);
@Override public Block getLoadedBlock() { Block loadedDictionary = dictionary.getLoadedBlock(); if (loadedDictionary == dictionary) { return this; } return new DictionaryBlock(idsOffset, getPositionCount(), loadedDictionary, ids, false, randomDictionaryId()); }
public AddDictionaryPageWork(Page page) { verify(canProcessDictionary(page), "invalid call to addDictionaryPage"); this.page = requireNonNull(page, "page is null"); this.dictionaryBlock = (DictionaryBlock) page.getBlock(channels[0]); updateDictionaryLookBack(dictionaryBlock.getDictionary()); this.dictionaryPage = createPageWithExtractedDictionary(page); }
@Override public DictionaryId apply(DictionaryBlock block) { return dictionarySourceIds.computeIfAbsent(block.getDictionarySourceId(), ignored -> randomDictionaryId()); }
@Test public void testSizeInBytes() { Slice[] expectedValues = createExpectedValues(10); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); assertEquals(dictionaryBlock.getSizeInBytes(), dictionaryBlock.getDictionary().getSizeInBytes() + (100 * SIZE_OF_INT)); }
@Test public void testCopyRegionCreatesCompactBlock() { Slice[] expectedValues = createExpectedValues(10); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); DictionaryBlock copyRegionDictionaryBlock = (DictionaryBlock) dictionaryBlock.copyRegion(1, 3); assertTrue(copyRegionDictionaryBlock.isCompact()); }
@Test public void testLogicalSizeInBytes() { // The 10 Slices in the array will be of lengths 0 to 9. Slice[] expectedValues = createExpectedValues(10); // The dictionary within the dictionary block is expected to be a VariableWidthBlock of size 95 bytes. // 45 bytes for the expectedValues Slices (sum of seq(0,9)) and 50 bytes for the position and isNull array (total 10 positions). DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); assertEquals(dictionaryBlock.getDictionary().getLogicalSizeInBytes(), 95); // The 100 positions in the dictionary block index to 10 positions in the underlying dictionary (10 each). // Logical size calculation accounts for 4 bytes of offset and 1 byte of isNull. Therefore the expected unoptimized // size is 10 times the size of the underlying dictionary (VariableWidthBlock). assertEquals(dictionaryBlock.getLogicalSizeInBytes(), 95 * 10); // With alternating nulls, we have 21 positions, with the same size calculation as above. dictionaryBlock = createDictionaryBlock(alternatingNullValues(expectedValues), 210); assertEquals(dictionaryBlock.getDictionary().getPositionCount(), 21); assertEquals(dictionaryBlock.getDictionary().getLogicalSizeInBytes(), 150); // The null positions should be included in the logical size. assertEquals(dictionaryBlock.getLogicalSizeInBytes(), 150 * 10); }