@Override public String toString() { StringBuilder sb = new StringBuilder("DictionaryBlock{"); sb.append("positionCount=").append(getPositionCount()); sb.append('}'); return sb.toString(); }
private static SelectedPositions selectDictionaryPositions(DictionaryBlock dictionaryBlock, boolean[] selectedDictionaryPositions) { int selectedCount = 0; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { if (selectedDictionaryPositions[dictionaryBlock.getId(position)]) { selectedCount++; } } if (selectedCount == 0 || selectedCount == dictionaryBlock.getPositionCount()) { return SelectedPositions.positionsRange(0, selectedCount); } int[] positions = new int[selectedCount]; int index = 0; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { if (selectedDictionaryPositions[dictionaryBlock.getId(position)]) { positions[index] = position; index++; } } return SelectedPositions.positionsList(positions, 0, selectedCount); }
private static void assertDictionaryIds(DictionaryBlock dictionaryBlock, int... expected) { assertEquals(dictionaryBlock.getPositionCount(), expected.length); for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { assertEquals(dictionaryBlock.getId(position), expected[position]); } } }
@Override public Block getLoadedBlock() { Block loadedDictionary = dictionary.getLoadedBlock(); if (loadedDictionary == dictionary) { return this; } return new DictionaryBlock(idsOffset, getPositionCount(), loadedDictionary, ids, false, randomDictionaryId()); }
@Override public long getLogicalSizeInBytes() { if (logicalSizeInBytes >= 0) { return logicalSizeInBytes; } // Calculation of logical size can be performed as part of calculateCompactSize() with minor modifications. // Keeping this calculation separate as this is a little more expensive and may not be called as often. long sizeInBytes = 0; long[] seenSizes = new long[dictionary.getPositionCount()]; Arrays.fill(seenSizes, -1L); for (int i = 0; i < getPositionCount(); i++) { int position = getId(i); if (seenSizes[position] < 0) { seenSizes[position] = dictionary.getRegionSizeInBytes(position, 1); } sizeInBytes += seenSizes[position]; } logicalSizeInBytes = sizeInBytes; return sizeInBytes; }
@Override public long getRegionSizeInBytes(int positionOffset, int length) { if (positionOffset == 0 && length == getPositionCount()) { // Calculation of getRegionSizeInBytes is expensive in this class. // On the other hand, getSizeInBytes result is cached. return getSizeInBytes(); } boolean[] used = new boolean[dictionary.getPositionCount()]; for (int i = positionOffset; i < positionOffset + length; i++) { used[getId(i)] = true; } return dictionary.getPositionsSizeInBytes(used) + Integer.BYTES * (long) length; }
private static ColumnarArray toColumnarArray(DictionaryBlock dictionaryBlock) { ColumnarArray columnarArray = toColumnarArray(dictionaryBlock.getDictionary()); // build new offsets int[] offsets = new int[dictionaryBlock.getPositionCount() + 1]; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { int dictionaryId = dictionaryBlock.getId(position); offsets[position + 1] = offsets[position] + columnarArray.getLength(dictionaryId); } // reindex dictionary int[] dictionaryIds = new int[offsets[dictionaryBlock.getPositionCount()]]; int nextDictionaryIndex = 0; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { int dictionaryId = dictionaryBlock.getId(position); int length = columnarArray.getLength(dictionaryId); // adjust to the element block start offset int startOffset = columnarArray.getOffset(dictionaryId) - columnarArray.getOffset(0); for (int entryIndex = 0; entryIndex < length; entryIndex++) { dictionaryIds[nextDictionaryIndex] = startOffset + entryIndex; nextDictionaryIndex++; } } return new ColumnarArray( dictionaryBlock, 0, offsets, new DictionaryBlock(dictionaryIds.length, columnarArray.getElementsBlock(), dictionaryIds)); }
@Test public void testCompactAllKeysReferenced() { Slice[] expectedValues = createExpectedValues(5); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 10); DictionaryBlock compactBlock = dictionaryBlock.compact(); // When there is nothing to compact, we return the same block assertEquals(compactBlock.getDictionary(), dictionaryBlock.getDictionary()); assertEquals(compactBlock.getPositionCount(), dictionaryBlock.getPositionCount()); for (int position = 0; position < compactBlock.getPositionCount(); position++) { assertEquals(compactBlock.getId(position), dictionaryBlock.getId(position)); } assertEquals(compactBlock.isCompact(), true); }
Block dictionary = firstDictionaryBlock.getDictionary(); int positionCount = firstDictionaryBlock.getPositionCount(); int dictionarySize = dictionary.getPositionCount();
private static ColumnarMap toColumnarMap(DictionaryBlock dictionaryBlock) { ColumnarMap columnarMap = toColumnarMap(dictionaryBlock.getDictionary()); // build new offsets int[] offsets = new int[dictionaryBlock.getPositionCount() + 1]; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { int dictionaryId = dictionaryBlock.getId(position); offsets[position + 1] = offsets[position] + columnarMap.getEntryCount(dictionaryId); } // reindex dictionary int[] dictionaryIds = new int[offsets[dictionaryBlock.getPositionCount()]]; int nextDictionaryIndex = 0; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { int dictionaryId = dictionaryBlock.getId(position); int entryCount = columnarMap.getEntryCount(dictionaryId); // adjust to the element block start offset int startOffset = columnarMap.getOffset(dictionaryId) - columnarMap.getOffset(0); for (int entryIndex = 0; entryIndex < entryCount; entryIndex++) { dictionaryIds[nextDictionaryIndex] = startOffset + entryIndex; nextDictionaryIndex++; } } return new ColumnarMap( dictionaryBlock, 0, offsets, new DictionaryBlock(dictionaryIds.length, columnarMap.getKeysBlock(), dictionaryIds), new DictionaryBlock(dictionaryIds.length, columnarMap.getValuesBlock(), dictionaryIds)); }
@Test public void testCopyPositionsNoCompaction() { Slice[] expectedValues = createExpectedValues(1); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); int[] positionsToCopy = new int[] {0, 2, 4, 5}; DictionaryBlock copiedBlock = (DictionaryBlock) dictionaryBlock.copyPositions(positionsToCopy, 0, positionsToCopy.length); assertEquals(copiedBlock.getPositionCount(), positionsToCopy.length); assertBlock(copiedBlock.getDictionary(), TestDictionaryBlock::createBlockBuilder, expectedValues); }
@Test public void testCopyPositionsWithCompactionsAndReorder() { Slice[] expectedValues = createExpectedValues(10); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); int[] positionsToCopy = new int[] {50, 55, 40, 45, 60}; DictionaryBlock copiedBlock = (DictionaryBlock) dictionaryBlock.copyPositions(positionsToCopy, 0, positionsToCopy.length); assertEquals(copiedBlock.getDictionary().getPositionCount(), 2); assertEquals(copiedBlock.getPositionCount(), positionsToCopy.length); assertBlock(copiedBlock.getDictionary(), TestDictionaryBlock::createBlockBuilder, new Slice[] {expectedValues[0], expectedValues[5]}); assertDictionaryIds(copiedBlock, 0, 1, 0, 1, 0); }
@Test public void testCopyPositionsSamePosition() { Slice[] expectedValues = createExpectedValues(10); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); int[] positionsToCopy = new int[] {52, 52, 52}; DictionaryBlock copiedBlock = (DictionaryBlock) dictionaryBlock.copyPositions(positionsToCopy, 0, positionsToCopy.length); assertEquals(copiedBlock.getDictionary().getPositionCount(), 1); assertEquals(copiedBlock.getPositionCount(), positionsToCopy.length); assertBlock(copiedBlock.getDictionary(), TestDictionaryBlock::createBlockBuilder, new Slice[] {expectedValues[2]}); assertDictionaryIds(copiedBlock, 0, 0, 0); }
@Test public void testCopyPositionsWithCompaction() { Slice[] expectedValues = createExpectedValues(10); Slice firstExpectedValue = expectedValues[0]; DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); int[] positionsToCopy = new int[] {0, 10, 20, 30, 40}; DictionaryBlock copiedBlock = (DictionaryBlock) dictionaryBlock.copyPositions(positionsToCopy, 0, positionsToCopy.length); assertEquals(copiedBlock.getDictionary().getPositionCount(), 1); assertEquals(copiedBlock.getPositionCount(), positionsToCopy.length); assertBlock(copiedBlock.getDictionary(), TestDictionaryBlock::createBlockBuilder, new Slice[] {firstExpectedValue}); assertBlock(copiedBlock, TestDictionaryBlock::createBlockBuilder, new Slice[] {firstExpectedValue, firstExpectedValue, firstExpectedValue, firstExpectedValue, firstExpectedValue}); }
private static ColumnarRow toColumnarRow(DictionaryBlock dictionaryBlock) { // build a mapping from the old dictionary to a new dictionary with nulls removed Block dictionary = dictionaryBlock.getDictionary(); int[] newDictionaryIndex = new int[dictionary.getPositionCount()]; int nextNewDictionaryIndex = 0; for (int position = 0; position < dictionary.getPositionCount(); position++) { if (!dictionary.isNull(position)) { newDictionaryIndex[position] = nextNewDictionaryIndex; nextNewDictionaryIndex++; } } // reindex the dictionary int[] dictionaryIds = new int[dictionaryBlock.getPositionCount()]; int nonNullPositionCount = 0; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { if (!dictionaryBlock.isNull(position)) { int oldDictionaryId = dictionaryBlock.getId(position); dictionaryIds[nonNullPositionCount] = newDictionaryIndex[oldDictionaryId]; nonNullPositionCount++; } } ColumnarRow columnarRow = toColumnarRow(dictionaryBlock.getDictionary()); Block[] fields = new Block[columnarRow.getFieldCount()]; for (int i = 0; i < columnarRow.getFieldCount(); i++) { fields[i] = new DictionaryBlock(nonNullPositionCount, columnarRow.getField(i), dictionaryIds); } return new ColumnarRow(dictionaryBlock, fields); }
@Override public void writeBlock(BlockEncodingSerde blockEncodingSerde, SliceOutput sliceOutput, Block block) { // The down casts here are safe because it is the block itself the provides this encoding implementation. DictionaryBlock dictionaryBlock = (DictionaryBlock) block; dictionaryBlock = dictionaryBlock.compact(); // positionCount int positionCount = dictionaryBlock.getPositionCount(); sliceOutput.appendInt(positionCount); // dictionary Block dictionary = dictionaryBlock.getDictionary(); blockEncodingSerde.writeBlock(sliceOutput, dictionary); // ids sliceOutput.writeBytes(dictionaryBlock.getIds()); // instance id sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getMostSignificantBits()); sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getLeastSignificantBits()); sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getSequenceId()); }
@Test public void testRoundTrip() { int positionCount = 40; // build dictionary BlockBuilder dictionaryBuilder = VARCHAR.createBlockBuilder(null, 4); VARCHAR.writeString(dictionaryBuilder, "alice"); VARCHAR.writeString(dictionaryBuilder, "bob"); VARCHAR.writeString(dictionaryBuilder, "charlie"); VARCHAR.writeString(dictionaryBuilder, "dave"); Block dictionary = dictionaryBuilder.build(); // build ids int[] ids = new int[positionCount]; for (int i = 0; i < 40; i++) { ids[i] = i % 4; } DictionaryBlock dictionaryBlock = new DictionaryBlock(dictionary, ids); DynamicSliceOutput sliceOutput = new DynamicSliceOutput(1024); blockEncodingSerde.writeBlock(sliceOutput, dictionaryBlock); Block actualBlock = blockEncodingSerde.readBlock(sliceOutput.slice().getInput()); assertTrue(actualBlock instanceof DictionaryBlock); DictionaryBlock actualDictionaryBlock = (DictionaryBlock) actualBlock; assertBlockEquals(VARCHAR, actualDictionaryBlock.getDictionary(), dictionary); for (int position = 0; position < actualDictionaryBlock.getPositionCount(); position++) { assertEquals(actualDictionaryBlock.getId(position), ids[position]); } assertEquals(actualDictionaryBlock.getDictionarySourceId(), dictionaryBlock.getDictionarySourceId()); }
@Override public String toString() { StringBuilder sb = new StringBuilder("DictionaryBlock{"); sb.append("positionCount=").append(getPositionCount()); sb.append('}'); return sb.toString(); }
@Override public Block getLoadedBlock() { Block loadedDictionary = dictionary.getLoadedBlock(); if (loadedDictionary == dictionary) { return this; } return new DictionaryBlock(idsOffset, getPositionCount(), loadedDictionary, ids, false, randomDictionaryId()); }
@Test public void testCopyPositionsWithCompaction() throws Exception { Slice[] expectedValues = createExpectedValues(10); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); List<Integer> positionsToCopy = Ints.asList(0, 10, 20, 30, 40); DictionaryBlock copiedBlock = (DictionaryBlock) dictionaryBlock.copyPositions(positionsToCopy); assertEquals(copiedBlock.getDictionary().getPositionCount(), 1); assertEquals(copiedBlock.getPositionCount(), positionsToCopy.size()); assertBlock(copiedBlock.getDictionary(), Arrays.copyOfRange(expectedValues, 0, 1)); }