@Description("convert Unicode code point to a string") @ScalarFunction @SqlType("varchar(1)") public static Slice chr(@SqlType(StandardTypes.BIGINT) long codepoint) { try { return SliceUtf8.codePointToUtf8(Ints.saturatedCast(codepoint)); } catch (InvalidCodePointException e) { throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Not a valid Unicode code point: " + codepoint, e); } }
continue; Slice value = codePointToUtf8(codePoint); if (findStringStatisticTruncationPositionForOriginalOrcWriter(value) == value.length()) { assertEquals(minStringTruncateToValidRange(value, ORIGINAL), value); continue; Slice value = concatSlice(prefix, codePointToUtf8(codePoint)); if (findStringStatisticTruncationPositionForOriginalOrcWriter(value) == value.length()) { assertEquals(minStringTruncateToValidRange(value, ORIGINAL), value);
private static void testMinStringTruncateAtFirstReplacementCharacter(Slice prefix, Slice suffix) { for (int testCodePoint : TEST_CODE_POINTS) { Slice codePoint = codePointToUtf8(testCodePoint); Slice value = concatSlice(prefix, codePoint, suffix); assertEquals(minStringTruncateToValidRange(value, ORC_HIVE_8732), value); // For ORIGINAL, skip prefixes that truncate if (prefix.equals(minStringTruncateToValidRange(prefix, ORIGINAL))) { if (testCodePoint == REPLACEMENT_CHARACTER_CODE_POINT || testCodePoint >= MIN_SUPPLEMENTARY_CODE_POINT) { // truncate at test code point assertEquals(minStringTruncateToValidRange(value, ORIGINAL), prefix); } else { // truncate in suffix (if at all) assertEquals(minStringTruncateToValidRange(value, ORIGINAL), concatSlice(prefix, codePoint, minStringTruncateToValidRange(suffix, ORIGINAL))); } } } }
continue; Slice value = codePointToUtf8(codePoint); if (findStringStatisticTruncationPositionForOriginalOrcWriter(value) == value.length()) { assertEquals(maxStringTruncateToValidRange(value, ORIGINAL), value); continue; Slice value = concatSlice(prefix, codePointToUtf8(codePoint)); if (findStringStatisticTruncationPositionForOriginalOrcWriter(value) == value.length()) { assertEquals(maxStringTruncateToValidRange(value, ORIGINAL), value);
private static void testMaxStringTruncateAtFirstReplacementCharacter(Slice prefix, Slice suffix) { for (int testCodePoint : TEST_CODE_POINTS) { Slice codePoint = codePointToUtf8(testCodePoint); Slice value = concatSlice(prefix, codePoint, suffix); assertEquals(maxStringTruncateToValidRange(value, ORC_HIVE_8732), value); // For ORIGINAL, skip prefixes that truncate if (prefix.equals(maxStringTruncateToValidRange(prefix, ORIGINAL))) { if (testCodePoint == REPLACEMENT_CHARACTER_CODE_POINT || testCodePoint >= MIN_SUPPLEMENTARY_CODE_POINT) { // truncate at test code point assertEquals(maxStringTruncateToValidRange(value, ORIGINAL), concatSlice(prefix, wrappedBuffer((byte) 0xFF))); } else { // truncate in suffix (if at all) assertEquals(maxStringTruncateToValidRange(value, ORIGINAL), concatSlice(prefix, codePoint, maxStringTruncateToValidRange(suffix, ORIGINAL))); } } } }
Slice codePoint = codePointToUtf8(testCodePoint); for (Slice suffix : ALL_UTF8_SEQUENCES) { Slice testValue = concatSlice(prefix, codePoint, suffix);
Slice codePoint = codePointToUtf8(testCodePoint); for (Slice suffix : ALL_UTF8_SEQUENCES) { Slice testValue = concatSlice(prefix, codePoint, suffix);
@Description("convert Unicode code point to a string") @ScalarFunction @SqlType("varchar(1)") public static Slice chr(@SqlType(StandardTypes.BIGINT) long codepoint) { try { return SliceUtf8.codePointToUtf8(Ints.saturatedCast(codepoint)); } catch (InvalidCodePointException e) { throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Not a valid Unicode code point: " + codepoint, e); } }
@Description("convert Unicode code point to a string") @ScalarFunction @SqlType("varchar(1)") public static Slice chr(@SqlType(StandardTypes.BIGINT) long codepoint) { try { return SliceUtf8.codePointToUtf8(Ints.saturatedCast(codepoint)); } catch (InvalidCodePointException e) { throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Not a valid Unicode code point: " + codepoint, e); } }
@Description("convert Unicode code point to a string") @ScalarFunction @SqlType("varchar(1)") public static Slice chr(@SqlType(StandardTypes.BIGINT) long codepoint) { try { return SliceUtf8.codePointToUtf8(Ints.saturatedCast(codepoint)); } catch (InvalidCodePointException e) { throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Not a valid Unicode code point: " + codepoint, e); } }
continue; Slice value = codePointToUtf8(codePoint); if (findStringStatisticTruncationPositionForOriginalOrcWriter(value) == value.length()) { assertEquals(minStringTruncateToValidRange(value, ORIGINAL), value); continue; Slice value = concatSlice(prefix, codePointToUtf8(codePoint)); if (findStringStatisticTruncationPositionForOriginalOrcWriter(value) == value.length()) { assertEquals(minStringTruncateToValidRange(value, ORIGINAL), value);
private static void testMinStringTruncateAtFirstReplacementCharacter(Slice prefix, Slice suffix) { for (int testCodePoint : TEST_CODE_POINTS) { Slice codePoint = codePointToUtf8(testCodePoint); Slice value = concatSlice(prefix, codePoint, suffix); assertEquals(minStringTruncateToValidRange(value, ORC_HIVE_8732), value); // For ORIGINAL, skip prefixes that truncate if (prefix.equals(minStringTruncateToValidRange(prefix, ORIGINAL))) { if (testCodePoint == REPLACEMENT_CHARACTER_CODE_POINT || testCodePoint >= MIN_SUPPLEMENTARY_CODE_POINT) { // truncate at test code point assertEquals(minStringTruncateToValidRange(value, ORIGINAL), prefix); } else { // truncate in suffix (if at all) assertEquals(minStringTruncateToValidRange(value, ORIGINAL), concatSlice(prefix, codePoint, minStringTruncateToValidRange(suffix, ORIGINAL))); } } } }
private static void testMinStringTruncateAtFirstReplacementCharacter(Slice prefix, Slice suffix) { for (int testCodePoint : TEST_CODE_POINTS) { Slice codePoint = codePointToUtf8(testCodePoint); Slice value = concatSlice(prefix, codePoint, suffix); assertEquals(minStringTruncateToValidRange(value, ORC_HIVE_8732), value); // For ORIGINAL, skip prefixes that truncate if (prefix.equals(minStringTruncateToValidRange(prefix, ORIGINAL))) { if (testCodePoint == REPLACEMENT_CHARACTER_CODE_POINT || testCodePoint >= MIN_SUPPLEMENTARY_CODE_POINT) { // truncate at test code point assertEquals(minStringTruncateToValidRange(value, ORIGINAL), prefix); } else { // truncate in suffix (if at all) assertEquals(minStringTruncateToValidRange(value, ORIGINAL), concatSlice(prefix, codePoint, minStringTruncateToValidRange(suffix, ORIGINAL))); } } } }
private static void testMinStringTruncateAtFirstReplacementCharacter(Slice prefix, Slice suffix) { for (int testCodePoint : TEST_CODE_POINTS) { Slice codePoint = codePointToUtf8(testCodePoint); Slice value = concatSlice(prefix, codePoint, suffix); assertEquals(minStringTruncateToValidRange(value, ORC_HIVE_8732), value); // For ORIGINAL, skip prefixes that truncate if (prefix.equals(minStringTruncateToValidRange(prefix, ORIGINAL))) { if (testCodePoint == REPLACEMENT_CHARACTER_CODE_POINT || testCodePoint >= MIN_SUPPLEMENTARY_CODE_POINT) { // truncate at test code point assertEquals(minStringTruncateToValidRange(value, ORIGINAL), prefix); } else { // truncate in suffix (if at all) assertEquals(minStringTruncateToValidRange(value, ORIGINAL), concatSlice(prefix, codePoint, minStringTruncateToValidRange(suffix, ORIGINAL))); } } } }
@Test public void testLengthOfCodePoint() { assertEquals(lengthOfCodePointFromStartByte(START_1_BYTE), 1); assertEquals(lengthOfCodePointFromStartByte(START_2_BYTE), 2); assertEquals(lengthOfCodePointFromStartByte(START_3_BYTE), 3); assertEquals(lengthOfCodePointFromStartByte(START_4_BYTE), 4); for (int codePoint : ALL_CODE_POINTS) { String string = new String(new int[] {codePoint}, 0, 1); assertEquals(string.codePoints().count(), 1); Slice utf8 = wrappedBuffer(string.getBytes(UTF_8)); assertEquals(lengthOfCodePoint(codePoint), utf8.length()); assertEquals(lengthOfCodePoint(utf8, 0), utf8.length()); assertEquals(lengthOfCodePointSafe(utf8, 0), utf8.length()); assertEquals(lengthOfCodePointFromStartByte(utf8.getByte(0)), utf8.length()); assertEquals(getCodePointAt(utf8, 0), codePoint); assertEquals(getCodePointBefore(utf8, utf8.length()), codePoint); assertEquals(codePointToUtf8(codePoint), utf8); } for (byte[] sequence : INVALID_SEQUENCES) { assertEquals(lengthOfCodePointSafe(wrappedBuffer(sequence), 0), sequence.length); assertEquals(lengthOfCodePointSafe(wrappedBuffer(concat(new byte[] {'x'}, sequence)), 1), sequence.length); assertEquals(lengthOfCodePointSafe(wrappedBuffer(concat(sequence, new byte[] {'x'})), 0), sequence.length); } }
continue; Slice value = codePointToUtf8(codePoint); if (findStringStatisticTruncationPositionForOriginalOrcWriter(value) == value.length()) { assertEquals(maxStringTruncateToValidRange(value, ORIGINAL), value); continue; Slice value = concatSlice(prefix, codePointToUtf8(codePoint)); if (findStringStatisticTruncationPositionForOriginalOrcWriter(value) == value.length()) { assertEquals(maxStringTruncateToValidRange(value, ORIGINAL), value);
@Test public void testLengthOfCodePoint() { assertEquals(lengthOfCodePointFromStartByte(START_1_BYTE), 1); assertEquals(lengthOfCodePointFromStartByte(START_2_BYTE), 2); assertEquals(lengthOfCodePointFromStartByte(START_3_BYTE), 3); assertEquals(lengthOfCodePointFromStartByte(START_4_BYTE), 4); for (int codePoint : ALL_CODE_POINTS) { String string = new String(new int[] {codePoint}, 0, 1); assertEquals(string.codePoints().count(), 1); Slice utf8 = wrappedBuffer(string.getBytes(UTF_8)); assertEquals(lengthOfCodePoint(codePoint), utf8.length()); assertEquals(lengthOfCodePoint(utf8, 0), utf8.length()); assertEquals(lengthOfCodePointSafe(utf8, 0), utf8.length()); assertEquals(lengthOfCodePointFromStartByte(utf8.getByte(0)), utf8.length()); assertEquals(getCodePointAt(utf8, 0), codePoint); assertEquals(getCodePointBefore(utf8, utf8.length()), codePoint); assertEquals(codePointToUtf8(codePoint), utf8); } for (byte[] sequence : INVALID_SEQUENCES) { assertEquals(lengthOfCodePointSafe(wrappedBuffer(sequence), 0), sequence.length); assertEquals(lengthOfCodePointSafe(wrappedBuffer(concat(new byte[] {'x'}, sequence)), 1), sequence.length); assertEquals(lengthOfCodePointSafe(wrappedBuffer(concat(sequence, new byte[] {'x'})), 0), sequence.length); } }
private static void testMaxStringTruncateAtFirstReplacementCharacter(Slice prefix, Slice suffix) { for (int testCodePoint : TEST_CODE_POINTS) { Slice codePoint = codePointToUtf8(testCodePoint); Slice value = concatSlice(prefix, codePoint, suffix); assertEquals(maxStringTruncateToValidRange(value, ORC_HIVE_8732), value); // For ORIGINAL, skip prefixes that truncate if (prefix.equals(maxStringTruncateToValidRange(prefix, ORIGINAL))) { if (testCodePoint == REPLACEMENT_CHARACTER_CODE_POINT || testCodePoint >= MIN_SUPPLEMENTARY_CODE_POINT) { // truncate at test code point assertEquals(maxStringTruncateToValidRange(value, ORIGINAL), concatSlice(prefix, wrappedBuffer((byte) 0xFF))); } else { // truncate in suffix (if at all) assertEquals(maxStringTruncateToValidRange(value, ORIGINAL), concatSlice(prefix, codePoint, maxStringTruncateToValidRange(suffix, ORIGINAL))); } } } }
private static void testMaxStringTruncateAtFirstReplacementCharacter(Slice prefix, Slice suffix) { for (int testCodePoint : TEST_CODE_POINTS) { Slice codePoint = codePointToUtf8(testCodePoint); Slice value = concatSlice(prefix, codePoint, suffix); assertEquals(maxStringTruncateToValidRange(value, ORC_HIVE_8732), value); // For ORIGINAL, skip prefixes that truncate if (prefix.equals(maxStringTruncateToValidRange(prefix, ORIGINAL))) { if (testCodePoint == REPLACEMENT_CHARACTER_CODE_POINT || testCodePoint >= MIN_SUPPLEMENTARY_CODE_POINT) { // truncate at test code point assertEquals(maxStringTruncateToValidRange(value, ORIGINAL), concatSlice(prefix, wrappedBuffer((byte) 0xFF))); } else { // truncate in suffix (if at all) assertEquals(maxStringTruncateToValidRange(value, ORIGINAL), concatSlice(prefix, codePoint, maxStringTruncateToValidRange(suffix, ORIGINAL))); } } } }
private static void testMaxStringTruncateAtFirstReplacementCharacter(Slice prefix, Slice suffix) { for (int testCodePoint : TEST_CODE_POINTS) { Slice codePoint = codePointToUtf8(testCodePoint); Slice value = concatSlice(prefix, codePoint, suffix); assertEquals(maxStringTruncateToValidRange(value, ORC_HIVE_8732), value); // For ORIGINAL, skip prefixes that truncate if (prefix.equals(maxStringTruncateToValidRange(prefix, ORIGINAL))) { if (testCodePoint == REPLACEMENT_CHARACTER_CODE_POINT || testCodePoint >= MIN_SUPPLEMENTARY_CODE_POINT) { // truncate at test code point assertEquals(maxStringTruncateToValidRange(value, ORIGINAL), concatSlice(prefix, wrappedBuffer((byte) 0xFF))); } else { // truncate in suffix (if at all) assertEquals(maxStringTruncateToValidRange(value, ORIGINAL), concatSlice(prefix, codePoint, maxStringTruncateToValidRange(suffix, ORIGINAL))); } } } }