public static Slice varcharPartitionKey(String value, String name, Type columnType) { Slice partitionKey = Slices.utf8Slice(value); VarcharType varcharType = (VarcharType) columnType; if (SliceUtf8.countCodePoints(partitionKey) > varcharType.getLength()) { throw new PrestoException(HIVE_INVALID_PARTITION_VALUE, format("Invalid partition value '%s' for %s partition key: %s", value, columnType.toString(), name)); } return partitionKey; }
public static Slice charPartitionKey(String value, String name, Type columnType) { Slice partitionKey = trimTrailingSpaces(Slices.utf8Slice(value)); CharType charType = (CharType) columnType; if (SliceUtf8.countCodePoints(partitionKey) > charType.getLength()) { throw new PrestoException(HIVE_INVALID_PARTITION_VALUE, format("Invalid partition value '%s' for %s partition key: %s", value, columnType.toString(), name)); } return partitionKey; }
@Override protected Type visitStringLiteral(StringLiteral node, StackableAstVisitorContext<Context> context) { VarcharType type = VarcharType.createVarcharType(SliceUtf8.countCodePoints(node.getSlice())); return setExpressionType(node, type); }
@Override protected RowExpression visitStringLiteral(StringLiteral node, Void context) { return constant(node.getSlice(), createVarcharType(countCodePoints(node.getSlice()))); }
public static Slice padSpaces(Slice slice, int length) { int textLength = countCodePoints(slice); // if our string is bigger than requested then truncate if (textLength > length) { throw new IllegalArgumentException("pad length is smaller than slice length"); } // if our target length is the same as our string then return our string if (textLength == length) { return slice; } // preallocate the result int bufferSize = slice.length() + length - textLength; Slice buffer = Slices.allocate(bufferSize); // fill in the existing string buffer.setBytes(0, slice); // fill padding spaces for (int i = slice.length(); i < bufferSize; ++i) { buffer.setByte(i, ' '); } return buffer; }
@Description("count of code points of the given string") @ScalarFunction @LiteralParameters("x") @SqlType(StandardTypes.BIGINT) public static long length(@SqlType("varchar(x)") Slice slice) { return countCodePoints(slice); }
@Description("returns Unicode code point of a single character string") @ScalarFunction("codepoint") @SqlType(StandardTypes.INTEGER) public static long codepoint(@SqlType("varchar(1)") Slice slice) { checkCondition(countCodePoints(slice) == 1, INVALID_FUNCTION_ARGUMENT, "Input string must be a single character string"); return getCodePointAt(slice, 0); }
@Description("decodes the UTF-8 encoded string") @ScalarFunction @LiteralParameters("x") @SqlType(StandardTypes.VARCHAR) public static Slice fromUtf8(@SqlType(StandardTypes.VARBINARY) Slice slice, @SqlType("varchar(x)") Slice replacementCharacter) { int count = countCodePoints(replacementCharacter); if (count > 1) { throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Replacement character string must empty or a single character"); } OptionalInt replacementCodePoint; if (count == 1) { try { replacementCodePoint = OptionalInt.of(getCodePointAt(replacementCharacter, 0)); } catch (InvalidUtf8Exception e) { throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Invalid replacement character"); } } else { replacementCodePoint = OptionalInt.empty(); } return SliceUtf8.fixInvalidUtf8(slice, replacementCodePoint); }
@Description("returns index of first occurrence of a substring (or 0 if not found)") @ScalarFunction("strpos") @LiteralParameters({"x", "y"}) @SqlType(StandardTypes.BIGINT) public static long stringPosition(@SqlType("varchar(x)") Slice string, @SqlType("varchar(y)") Slice substring) { if (substring.length() == 0) { return 1; } int index = string.indexOf(substring); if (index < 0) { return 0; } return countCodePoints(string, 0, index) + 1; }
StringLiteral stringLiteral = new StringLiteral(value.toStringUtf8()); if (!varcharType.isUnbounded() && varcharType.getLengthSafe() == SliceUtf8.countCodePoints(value)) { return stringLiteral;
checkCondition(padString.length() > 0, INVALID_FUNCTION_ARGUMENT, "Padding string must not be empty"); int textLength = countCodePoints(text); int resultLength = (int) targetLength; int padStringLength = countCodePoints(padString); int[] padStringCounts = new int[padStringLength]; for (int i = 0; i < padStringLength; ++i) {
@Description("suffix starting at given index") @ScalarFunction @LiteralParameters("x") @SqlType("varchar(x)") public static Slice substr(@SqlType("varchar(x)") Slice utf8, @SqlType(StandardTypes.BIGINT) long start) { if ((start == 0) || utf8.length() == 0) { return Slices.EMPTY_SLICE; } int startCodePoint = Ints.saturatedCast(start); if (startCodePoint > 0) { int indexStart = offsetOfCodePoint(utf8, startCodePoint - 1); if (indexStart < 0) { // before beginning of string return Slices.EMPTY_SLICE; } int indexEnd = utf8.length(); return utf8.slice(indexStart, indexEnd - indexStart); } // negative start is relative to end of string int codePoints = countCodePoints(utf8); startCodePoint += codePoints; // before beginning of string if (startCodePoint < 0) { return Slices.EMPTY_SLICE; } int indexStart = offsetOfCodePoint(utf8, startCodePoint); int indexEnd = utf8.length(); return utf8.slice(indexStart, indexEnd - indexStart); }
int codePoints = countCodePoints(utf8); startCodePoint += codePoints;
Slice buffer = Slices.allocate((countCodePoints(str) + 1) * replace.length() + str.length());
/** * Counts the code points within UTF-8 encoded slice. * <p> * Note: This method does not explicitly check for valid UTF-8, and may * return incorrect results or throw an exception for invalid UTF-8. */ public static int countCodePoints(Slice utf8) { return countCodePoints(utf8, 0, utf8.length()); }
@Override protected Type visitStringLiteral(StringLiteral node, StackableAstVisitorContext<AnalysisContext> context) { VarcharType type = VarcharType.createVarcharType(SliceUtf8.countCodePoints(node.getSlice())); expressionTypes.put(node, type); return type; }
@Description("count of code points of the given string") @ScalarFunction @LiteralParameters("x") @SqlType(StandardTypes.BIGINT) public static long length(@SqlType("varchar(x)") Slice slice) { return countCodePoints(slice); }
@Description("count of code points of the given string") @ScalarFunction @LiteralParameters("x") @SqlType(StandardTypes.BIGINT) public static long length(@SqlType("varchar(x)") Slice slice) { return countCodePoints(slice); }
@Benchmark public int benchmarkCountCodePoints(BenchmarkData data) { int codePoints = countCodePoints(data.getSlice()); if (codePoints != data.getLength()) { throw new AssertionError(); } return codePoints; }
@Description("returns Unicode code point of a single character string") @ScalarFunction("codepoint") @SqlType(StandardTypes.INTEGER) public static long codepoint(@SqlType("varchar(1)") Slice slice) { checkCondition(countCodePoints(slice) == 1, INVALID_FUNCTION_ARGUMENT, "Input string must be a single character string"); return getCodePointAt(slice, 0); }