@Override protected Object getGreaterValue(Object value) { return Slices.utf8Slice(((Slice) value).toStringUtf8() + "_"); } }
@Override public Block decodeColumn(ColumnData columnData) { int size = columnData.rowCount(); BlockBuilder builder = type.createBlockBuilder(null, size); Slice slice = columnData.getSlice(); for (int i = 0; i < size; i++) { int offset = columnData.getOffset(i); int length = columnData.getLength(i); if (nullSequence.equals(0, nullSequence.length(), slice, offset, length)) { builder.appendNull(); } else { byte[] data = slice.getBytes(offset, length); type.writeSlice(builder, Slices.wrappedBuffer(base64Decoder.decode(data))); } } return builder.build(); }
public Block split(Slice source) { Matcher matcher = re2jPattern.matcher(source); BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(null, 32); int lastEnd = 0; while (matcher.find()) { Slice slice = source.slice(lastEnd, matcher.start() - lastEnd); lastEnd = matcher.end(); VARCHAR.writeSlice(blockBuilder, slice); } VARCHAR.writeSlice(blockBuilder, source.slice(lastEnd, source.length() - lastEnd)); return blockBuilder.build(); }
public static String truncateIfNecessaryForErrorMessage(Slice json) { if (json.length() <= MAX_JSON_LENGTH_IN_ERROR_MESSAGE) { return json.toStringUtf8(); } else { return json.slice(0, MAX_JSON_LENGTH_IN_ERROR_MESSAGE).toStringUtf8() + "...(truncated)"; } }
private Slice dropStringMinMaxIfNecessary(Slice minOrMax) { if (minOrMax == null || minOrMax.length() > stringStatisticsLimitInBytes) { return null; } // Do not hold the entire slice where the actual stats could be small if (minOrMax.isCompact()) { return minOrMax; } return Slices.copyOf(minOrMax); } }
@Test public void testGetMinSlice() Slice minSlice = utf8Slice(""); if (findStringStatisticTruncationPositionForOriginalOrcWriter(value) == value.length()) { assertEquals(minStringTruncateToValidRange(value, ORIGINAL), value); assertEquals(minStringTruncateToValidRange(value, ORIGINAL), minSlice); Slice prefix = utf8Slice("apple"); for (int codePoint = startCodePoint; codePoint < endCodePoint; codePoint++) { if (MIN_SURROGATE <= codePoint && codePoint <= MAX_SURROGATE) { if (findStringStatisticTruncationPositionForOriginalOrcWriter(value) == value.length()) { assertEquals(minStringTruncateToValidRange(value, ORIGINAL), value);
@Test public void testRoundTrip() { int positionCount = 40; // build dictionary BlockBuilder dictionaryBuilder = VARCHAR.createBlockBuilder(null, 4); VARCHAR.writeString(dictionaryBuilder, "alice"); VARCHAR.writeString(dictionaryBuilder, "bob"); VARCHAR.writeString(dictionaryBuilder, "charlie"); VARCHAR.writeString(dictionaryBuilder, "dave"); Block dictionary = dictionaryBuilder.build(); // build ids int[] ids = new int[positionCount]; for (int i = 0; i < 40; i++) { ids[i] = i % 4; } DictionaryBlock dictionaryBlock = new DictionaryBlock(dictionary, ids); DynamicSliceOutput sliceOutput = new DynamicSliceOutput(1024); blockEncodingSerde.writeBlock(sliceOutput, dictionaryBlock); Block actualBlock = blockEncodingSerde.readBlock(sliceOutput.slice().getInput()); assertTrue(actualBlock instanceof DictionaryBlock); DictionaryBlock actualDictionaryBlock = (DictionaryBlock) actualBlock; assertBlockEquals(VARCHAR, actualDictionaryBlock.getDictionary(), dictionary); for (int position = 0; position < actualDictionaryBlock.getPositionCount(); position++) { assertEquals(actualDictionaryBlock.getId(position), ids[position]); } assertEquals(actualDictionaryBlock.getDictionarySourceId(), dictionaryBlock.getDictionarySourceId()); }
@Test public void testCopyStatsToSaveMemory() { StringStatisticsBuilder statisticsBuilder = new StringStatisticsBuilder(Integer.MAX_VALUE); Slice shortSlice = Slices.wrappedBuffer(LONG_BOTTOM_VALUE.getBytes(), 0, 1); statisticsBuilder.addValue(shortSlice); Slice stats = statisticsBuilder.buildColumnStatistics().getStringStatistics().getMax(); // assert we only spend 1 byte for stats assertNotNull(stats); assertEquals(stats.getRetainedSize(), Slices.wrappedBuffer(new byte[1]).getRetainedSize()); }
@Test public void testGetRecordSet() { ExampleRecordSetProvider recordSetProvider = new ExampleRecordSetProvider(new ExampleConnectorId("test")); RecordSet recordSet = recordSetProvider.getRecordSet(ExampleTransactionHandle.INSTANCE, SESSION, new ExampleSplit("test", "schema", "table", dataUri), ImmutableList.of( new ExampleColumnHandle("test", "text", createUnboundedVarcharType(), 0), new ExampleColumnHandle("test", "value", BIGINT, 1))); assertNotNull(recordSet, "recordSet is null"); RecordCursor cursor = recordSet.cursor(); assertNotNull(cursor, "cursor is null"); Map<String, Long> data = new LinkedHashMap<>(); while (cursor.advanceNextPosition()) { data.put(cursor.getSlice(0).toStringUtf8(), cursor.getLong(1)); } assertEquals(data, ImmutableMap.<String, Long>builder() .put("ten", 10L) .put("eleven", 11L) .put("twelve", 12L) .build()); }
protected void assertSlicePosition(Block block, int position, Slice expectedSliceValue) { int length = block.getSliceLength(position); assertEquals(length, expectedSliceValue.length()); Block expectedBlock = toSingeValuedBlock(expectedSliceValue); for (int offset = 0; offset < length - 3; offset++) { assertEquals(block.getSlice(position, offset, 3), expectedSliceValue.slice(offset, 3)); assertTrue(block.bytesEqual(position, offset, expectedSliceValue, offset, 3)); // if your tests fail here, please change your test to not use this value assertFalse(block.bytesEqual(position, offset, Slices.utf8Slice("XXX"), 0, 3)); assertEquals(block.bytesCompare(position, offset, 3, expectedSliceValue, offset, 3), 0); assertTrue(block.bytesCompare(position, offset, 3, expectedSliceValue, offset, 2) > 0); Slice greaterSlice = createGreaterValue(expectedSliceValue, offset, 3); assertTrue(block.bytesCompare(position, offset, 3, greaterSlice, 0, greaterSlice.length()) < 0); assertTrue(block.equals(position, offset, expectedBlock, 0, offset, 3)); assertEquals(block.compareTo(position, offset, 3, expectedBlock, 0, offset, 3), 0); BlockBuilder blockBuilder = VARBINARY.createBlockBuilder(null, 1); block.writeBytesTo(position, offset, 3, blockBuilder); blockBuilder.closeEntry(); Block segment = blockBuilder.build(); assertTrue(block.equals(position, offset, segment, 0, 0, 3)); } }
@Test public void testHashCode() { Slice data = Slices.wrappedBuffer(ALL_BYTES); Block block = VARBINARY.createBlockBuilder(null, 1, ALL_BYTES.length) .writeBytes(data, 0, data.length()) .closeEntry() .build(); assertEquals(VarbinaryOperators.hashCode(data), VARBINARY.hash(block, 0)); }
@Test public void testRoundTrip() { BlockBuilder expectedBlockBuilder = VARCHAR.createBlockBuilder(null, 4); VARCHAR.writeString(expectedBlockBuilder, "alice"); VARCHAR.writeString(expectedBlockBuilder, "bob"); VARCHAR.writeString(expectedBlockBuilder, "charlie"); VARCHAR.writeString(expectedBlockBuilder, "dave"); Block expectedBlock = expectedBlockBuilder.build(); DynamicSliceOutput sliceOutput = new DynamicSliceOutput(1024); blockEncodingSerde.writeBlock(sliceOutput, expectedBlock); Block actualBlock = blockEncodingSerde.readBlock(sliceOutput.slice().getInput()); assertBlockEquals(VARCHAR, actualBlock, expectedBlock); }
@Test public void testEvaluateClassifierPredictions() { metadata.addFunctions(extractFunctions(new MLPlugin().getFunctions())); InternalAggregationFunction aggregation = metadata.getFunctionRegistry().getAggregateFunctionImplementation( new Signature("evaluate_classifier_predictions", AGGREGATE, parseTypeSignature(StandardTypes.VARCHAR), parseTypeSignature(StandardTypes.BIGINT), parseTypeSignature(StandardTypes.BIGINT))); Accumulator accumulator = aggregation.bind(ImmutableList.of(0, 1), Optional.empty()).createAccumulator(); accumulator.addInput(getPage()); BlockBuilder finalOut = accumulator.getFinalType().createBlockBuilder(null, 1); accumulator.evaluateFinal(finalOut); Block block = finalOut.build(); String output = VARCHAR.getSlice(block, 0).toStringUtf8(); List<String> parts = ImmutableList.copyOf(Splitter.on('\n').omitEmptyStrings().split(output)); assertEquals(parts.size(), 7, output); assertEquals(parts.get(0), "Accuracy: 1/2 (50.00%)"); }
@Test public void testEstimatedDataSizeForStats() { int positionCount = 10; Slice expectedValue = createExpectedValue(5); Block block = new RunLengthEncodedBlock(createSingleValueBlock(expectedValue), positionCount); for (int postition = 0; postition < positionCount; postition++) { assertEquals(block.getEstimatedDataSizeForStats(postition), expectedValue.length()); } }
@Test public void testEstimatedDataSizeForStats() { int positionCount = 10; int dictionaryPositionCount = 100; Slice[] expectedValues = createExpectedValues(positionCount); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, dictionaryPositionCount); for (int position = 0; position < dictionaryPositionCount; position++) { assertEquals(dictionaryBlock.getEstimatedDataSizeForStats(position), expectedValues[position % positionCount].length()); } }
@Test public void testRoundTrip() { DynamicSliceOutput sliceOutput = new DynamicSliceOutput(1024); writeType(sliceOutput, BOOLEAN); Type actualType = readType(new TestingTypeManager(), sliceOutput.slice().getInput()); assertEquals(actualType, BOOLEAN); } }
private static void assertByteCountWithoutTrailingSpace(byte[] actual, int offset, int length, byte[] expected) { Slice slice = wrappedBuffer(actual); int trimmedLength = byteCountWithoutTrailingSpace(slice, offset, length); byte[] bytes = slice.getBytes(offset, trimmedLength); assertEquals(bytes, expected); }
@Test private void testSplitPageNonDecreasingPageSize() { int positionCount = 100; int maxPageSizeInBytes = 1; List<Type> types = ImmutableList.of(VARCHAR); Slice expectedValue = wrappedBuffer("test".getBytes()); BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(null, 1, expectedValue.length()); blockBuilder.writeBytes(expectedValue, 0, expectedValue.length()).closeEntry(); Block rleBlock = new RunLengthEncodedBlock(blockBuilder.build(), positionCount); Page initialPage = new Page(rleBlock); List<Page> pages = splitPage(initialPage, maxPageSizeInBytes); // the page should only be split in half as the recursion should terminate // after seeing that the size of the Page doesn't decrease assertEquals(pages.size(), 2); Page first = pages.get(0); Page second = pages.get(1); // the size of the pages will remain the same and should be greater than the maxPageSizeInBytes assertGreaterThan((int) first.getSizeInBytes(), maxPageSizeInBytes); assertGreaterThan((int) second.getSizeInBytes(), maxPageSizeInBytes); assertPositionCount(pages, positionCount); MaterializedResult actual = toMaterializedResult(TEST_SESSION, types, pages); MaterializedResult expected = toMaterializedResult(TEST_SESSION, types, ImmutableList.of(initialPage)); assertEquals(actual, expected); } }
@Test public void smokedTest() throws Exception { assertExecute("cast(true as boolean)", BOOLEAN, true); assertExecute("true", BOOLEAN, true); assertExecute("false", BOOLEAN, false); assertExecute("42", INTEGER, 42); assertExecute("'foo'", createVarcharType(3), "foo"); assertExecute("4.2E0", DOUBLE, 4.2); assertExecute("10000000000 + 1", BIGINT, 10000000001L); assertExecute("4.2", createDecimalType(2, 1), new SqlDecimal(BigInteger.valueOf(42), 2, 1)); assertExecute("DECIMAL '4.2'", createDecimalType(2, 1), new SqlDecimal(BigInteger.valueOf(42), 2, 1)); assertExecute("X' 1 f'", VARBINARY, new SqlVarbinary(Slices.wrappedBuffer((byte) 0x1f).getBytes())); assertExecute("X' '", VARBINARY, new SqlVarbinary(new byte[0])); assertExecute("bound_integer", INTEGER, 1234); assertExecute("bound_long", BIGINT, 1234L); assertExecute("bound_string", VARCHAR, "hello"); assertExecute("bound_double", DOUBLE, 12.34); assertExecute("bound_boolean", BOOLEAN, true); assertExecute("bound_timestamp", BIGINT, new DateTime(2001, 8, 22, 3, 4, 5, 321, UTC).getMillis()); assertExecute("bound_pattern", VARCHAR, "%el%"); assertExecute("bound_null_string", VARCHAR, null); assertExecute("bound_timestamp_with_timezone", TIMESTAMP_WITH_TIME_ZONE, new SqlTimestampWithTimeZone(new DateTime(1970, 1, 1, 0, 1, 0, 999, DateTimeZone.UTC).getMillis(), TimeZoneKey.getTimeZoneKey("Z"))); assertExecute("bound_binary_literal", VARBINARY, new SqlVarbinary(new byte[] {(byte) 0xab})); // todo enable when null output type is supported // assertExecute("null", null); Futures.allAsList(futures).get(); }