int codePoint = tryGetCodePointAt(utf8, position);
private static int safeCountCodePoints(Slice slice) { int codePoints = 0; for (int position = 0; position < slice.length(); ) { int codePoint = tryGetCodePointAt(slice, position); if (codePoint < 0) { throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Invalid UTF-8 encoding in characters: " + slice.toStringUtf8()); } position += lengthOfCodePoint(codePoint); codePoints++; } return codePoints; }
@Description("computes Hamming distance between two strings") @ScalarFunction @LiteralParameters({"x", "y"}) @SqlType(StandardTypes.BIGINT) public static long hammingDistance(@SqlType("varchar(x)") Slice left, @SqlType("varchar(y)") Slice right) { int distance = 0; int leftPosition = 0; int rightPosition = 0; while (leftPosition < left.length() && rightPosition < right.length()) { int codePointLeft = tryGetCodePointAt(left, leftPosition); int codePointRight = tryGetCodePointAt(right, rightPosition); // if both code points are invalid, we do not care if they are equal // the following code treats them as equal if they happen to be of the same length if (codePointLeft != codePointRight) { distance++; } leftPosition += codePointLeft > 0 ? lengthOfCodePoint(codePointLeft) : -codePointLeft; rightPosition += codePointRight > 0 ? lengthOfCodePoint(codePointRight) : -codePointRight; } checkCondition( leftPosition == left.length() && rightPosition == right.length(), INVALID_FUNCTION_ARGUMENT, "The input strings to hamming_distance function must have the same length"); return distance; }
private static int firstNonWhitespacePosition(Slice utf8) { int length = utf8.length(); int position = 0; while (position < length) { int codePoint = tryGetCodePointAt(utf8, position); if (codePoint < 0) { break; } if (!WHITESPACE_CODE_POINTS[codePoint]) { break; } position += lengthOfCodePoint(codePoint); } return position; }
private static int firstNonWhitespacePosition(Slice utf8) { int length = utf8.length(); int position = 0; while (position < length) { int codePoint = tryGetCodePointAt(utf8, position); if (codePoint < 0) { break; } if (!WHITESPACE_CODE_POINTS[codePoint]) { break; } position += lengthOfCodePoint(codePoint); } return position; }
private static int firstNonMatchPosition(Slice utf8, int[] codePointsToMatch) { int length = utf8.length(); int position = 0; while (position < length) { int codePoint = tryGetCodePointAt(utf8, position); if (codePoint < 0) { break; } if (!matches(codePoint, codePointsToMatch)) { break; } position += lengthOfCodePoint(codePoint); } return position; }
private static int firstNonMatchPosition(Slice utf8, int[] codePointsToMatch) { int length = utf8.length(); int position = 0; while (position < length) { int codePoint = tryGetCodePointAt(utf8, position); if (codePoint < 0) { break; } if (!matches(codePoint, codePointsToMatch)) { break; } position += lengthOfCodePoint(codePoint); } return position; }
private static int safeCountCodePoints(Slice slice) { int codePoints = 0; for (int position = 0; position < slice.length(); ) { int codePoint = tryGetCodePointAt(slice, position); if (codePoint < 0) { throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Invalid UTF-8 encoding in characters: " + slice.toStringUtf8()); } position += lengthOfCodePoint(codePoint); codePoints++; } return codePoints; }
private static int safeCountCodePoints(Slice slice) { int codePoints = 0; for (int position = 0; position < slice.length(); ) { int codePoint = tryGetCodePointAt(slice, position); if (codePoint < 0) { throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Invalid UTF-8 encoding in characters: " + slice.toStringUtf8()); } position += lengthOfCodePoint(codePoint); codePoints++; } return codePoints; }
int leftCodePoint = tryGetCodePointAt(utf8Left, offset); if (leftCodePoint < 0) { throw new InvalidUtf8Exception("Invalid UTF-8 sequence in utf8Left at " + offset); int rightCodePoint = tryGetCodePointAt(utf8Right, offset); if (rightCodePoint < 0) { throw new InvalidUtf8Exception("Invalid UTF-8 sequence in utf8Right at " + offset);
codePoint = tryGetCodePointAt(utf8, position - 2); codePointLength = 2; codePoint = tryGetCodePointAt(utf8, position - 3); codePointLength = 3; codePoint = tryGetCodePointAt(utf8, position - 4); codePointLength = 4;
codePoint = tryGetCodePointAt(utf8, position - 2); codePointLength = 2; codePoint = tryGetCodePointAt(utf8, position - 3); codePointLength = 3; codePoint = tryGetCodePointAt(utf8, position - 4); codePointLength = 4;
codePoint = tryGetCodePointAt(utf8, position - 2); codePointLength = 2; codePoint = tryGetCodePointAt(utf8, position - 3); codePointLength = 3; codePoint = tryGetCodePointAt(utf8, position - 4); codePointLength = 4;
codePoint = tryGetCodePointAt(utf8, position - 2); codePointLength = 2; codePoint = tryGetCodePointAt(utf8, position - 3); codePointLength = 3; codePoint = tryGetCodePointAt(utf8, position - 4); codePointLength = 4;
int utf8Position = 0; while (dataPosition < length) { int codePoint = tryGetCodePointAt(slice, dataPosition); int codePointLength; if (codePoint >= 0) {
int utf8Position = 0; while (dataPosition < length) { int codePoint = tryGetCodePointAt(slice, dataPosition); int codePointLength; if (codePoint >= 0) {
@Description("computes Hamming distance between two strings") @ScalarFunction @LiteralParameters({"x", "y"}) @SqlType(StandardTypes.BIGINT) public static long hammingDistance(@SqlType("varchar(x)") Slice left, @SqlType("varchar(y)") Slice right) { int distance = 0; int leftPosition = 0; int rightPosition = 0; while (leftPosition < left.length() && rightPosition < right.length()) { int codePointLeft = tryGetCodePointAt(left, leftPosition); int codePointRight = tryGetCodePointAt(right, rightPosition); // if both code points are invalid, we do not care if they are equal // the following code treats them as equal if they happen to be of the same length if (codePointLeft != codePointRight) { distance++; } leftPosition += codePointLeft > 0 ? lengthOfCodePoint(codePointLeft) : -codePointLeft; rightPosition += codePointRight > 0 ? lengthOfCodePoint(codePointRight) : -codePointRight; } checkCondition( leftPosition == left.length() && rightPosition == right.length(), INVALID_FUNCTION_ARGUMENT, "The input strings to hamming_distance function must have the same length"); return distance; }
@Description("computes Hamming distance between two strings") @ScalarFunction @LiteralParameters({"x", "y"}) @SqlType(StandardTypes.BIGINT) public static long hammingDistance(@SqlType("varchar(x)") Slice left, @SqlType("varchar(y)") Slice right) { int distance = 0; int leftPosition = 0; int rightPosition = 0; while (leftPosition < left.length() && rightPosition < right.length()) { int codePointLeft = tryGetCodePointAt(left, leftPosition); int codePointRight = tryGetCodePointAt(right, rightPosition); // if both code points are invalid, we do not care if they are equal // the following code treats them as equal if they happen to be of the same length if (codePointLeft != codePointRight) { distance++; } leftPosition += codePointLeft > 0 ? lengthOfCodePoint(codePointLeft) : -codePointLeft; rightPosition += codePointRight > 0 ? lengthOfCodePoint(codePointRight) : -codePointRight; } checkCondition( leftPosition == left.length() && rightPosition == right.length(), INVALID_FUNCTION_ARGUMENT, "The input strings to hamming_distance function must have the same length"); return distance; }
int upperPosition = 0; while (position < length) { int codePoint = tryGetCodePointAt(utf8, position); if (codePoint >= 0) { int upperCodePoint = codePointTranslationMap[codePoint];
int upperPosition = 0; while (position < length) { int codePoint = tryGetCodePointAt(utf8, position); if (codePoint >= 0) { int upperCodePoint = codePointTranslationMap[codePoint];