io.airlift.slice.SliceUtf8 java code examples

Refine search

    INVALID_FUNCTION_ARGUMENT,
    "Target length must be in the range [0.." + Integer.MAX_VALUE + "]");
checkCondition(padString.length() > 0, INVALID_FUNCTION_ARGUMENT, "Padding string must not be empty");
int textLength = countCodePoints(text);
int resultLength = (int) targetLength;
  return SliceUtf8.substring(text, 0, resultLength);
int padStringLength = countCodePoints(padString);
int[] padStringCounts = new int[padStringLength];
for (int i = 0; i < padStringLength; ++i) {
  padStringCounts[i] = lengthOfCodePointSafe(padString, offsetOfCodePoint(padString, i));
int bufferSize = text.length();
for (int i = 0; i < resultLength - textLength; ++i) {
  bufferSize += padStringCounts[i % padStringLength];
Slice buffer = Slices.allocate(bufferSize);
int countBytes = bufferSize - text.length();
int startPointOfExistingText = (paddingOffset + countBytes) % bufferSize;
buffer.setBytes(startPointOfExistingText, text);

private static int safeCountCodePoints(Slice slice)
{
  int codePoints = 0;
  for (int position = 0; position < slice.length(); ) {
    int codePoint = tryGetCodePointAt(slice, position);
    if (codePoint < 0) {
      throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Invalid UTF-8 encoding in characters: " + slice.toStringUtf8());
    }
    position += lengthOfCodePoint(codePoint);
    codePoints++;
  }
  return codePoints;
}

@Test
public void testGetMinSlice()
  Slice minSlice = utf8Slice("");
      continue;
    Slice value = codePointToUtf8(codePoint);
    if (findStringStatisticTruncationPositionForOriginalOrcWriter(value) == value.length()) {
      assertEquals(minStringTruncateToValidRange(value, ORIGINAL), value);
      assertEquals(minStringTruncateToValidRange(value, ORIGINAL), minSlice);
  Slice prefix = utf8Slice("apple");
  for (int codePoint = startCodePoint; codePoint < endCodePoint; codePoint++) {
    if (MIN_SURROGATE <= codePoint && codePoint <= MAX_SURROGATE) {
      continue;
    Slice value = concatSlice(prefix, codePointToUtf8(codePoint));
    if (findStringStatisticTruncationPositionForOriginalOrcWriter(value) == value.length()) {
      assertEquals(minStringTruncateToValidRange(value, ORIGINAL), value);

private static List<Integer> toCodePoints(Slice slice)
{
  ImmutableList.Builder<Integer> codePoints = ImmutableList.builder();
  for (int offset = 0; offset < slice.length(); ) {
    int codePoint = getCodePointAt(slice, offset);
    offset += lengthOfCodePoint(slice, offset);
    codePoints.add(codePoint);
  }
  return codePoints.build();
}

private static int[] castToCodePoints(Slice slice)
{
  int[] codePoints = new int[safeCountCodePoints(slice)];
  int position = 0;
  for (int index = 0; index < codePoints.length; index++) {
    codePoints[index] = getCodePointAt(slice, position);
    position += lengthOfCodePoint(slice, position);
  }
  return codePoints;
}

@Test
public void testToStringStatistics()
  assertNull(DwrfMetadataReader.toStringStatistics(
      HiveWriterVersion.ORIGINAL,
      DwrfProto.StringStatistics.newBuilder()
    assertEquals(
        DwrfMetadataReader.toStringStatistics(
            HiveWriterVersion.ORC_HIVE_8732,
  assertEquals(
      DwrfMetadataReader.toStringStatistics(
          HiveWriterVersion.ORIGINAL,
              .build(),
          true),
      new StringStatistics(null, Slices.utf8Slice("cat"), 0));
              .build(),
          true),
      new StringStatistics(Slices.utf8Slice("ant"), Slices.utf8Slice("cat"), 79));
      Slice codePoint = codePointToUtf8(testCodePoint);
      for (Slice suffix : ALL_UTF8_SEQUENCES) {
        Slice testValue = concatSlice(prefix, codePoint, suffix);

@Test(expectedExceptions = InvalidUtf8Exception.class, expectedExceptionsMessageRegExp = "UTF-8 sequence truncated")
public void testCodePointAtTruncated3()
{
  getCodePointAt(wrappedBuffer((byte) 'x', START_3_BYTE, CONTINUATION_BYTE), 1);
}

@Test
public void testLengthOfCodePoint()
{
  assertEquals(lengthOfCodePointFromStartByte(START_1_BYTE), 1);
  assertEquals(lengthOfCodePointFromStartByte(START_2_BYTE), 2);
  assertEquals(lengthOfCodePointFromStartByte(START_3_BYTE), 3);
  assertEquals(lengthOfCodePointFromStartByte(START_4_BYTE), 4);
  for (int codePoint : ALL_CODE_POINTS) {
    String string = new String(new int[] {codePoint}, 0, 1);
    assertEquals(string.codePoints().count(), 1);
    Slice utf8 = wrappedBuffer(string.getBytes(UTF_8));
    assertEquals(lengthOfCodePoint(codePoint), utf8.length());
    assertEquals(lengthOfCodePoint(utf8, 0), utf8.length());
    assertEquals(lengthOfCodePointSafe(utf8, 0), utf8.length());
    assertEquals(lengthOfCodePointFromStartByte(utf8.getByte(0)), utf8.length());
    assertEquals(getCodePointAt(utf8, 0), codePoint);
    assertEquals(getCodePointBefore(utf8, utf8.length()), codePoint);
    assertEquals(codePointToUtf8(codePoint), utf8);
  }
  for (byte[] sequence : INVALID_SEQUENCES) {
    assertEquals(lengthOfCodePointSafe(wrappedBuffer(sequence), 0), sequence.length);
    assertEquals(lengthOfCodePointSafe(wrappedBuffer(concat(new byte[] {'x'}, sequence)), 1), sequence.length);
    assertEquals(lengthOfCodePointSafe(wrappedBuffer(concat(sequence, new byte[] {'x'})), 0), sequence.length);
  }
}

/**
 * Test invalid UTF8 encodings. We do not expect a 'correct' but none harmful result.
 */
@Test
public void testInvalidUtf8()
{
  assertEquals(countCodePoints(wrappedBuffer(INVALID_UTF8_1)), 0);
  assertEquals(countCodePoints(wrappedBuffer(INVALID_UTF8_2)), 3);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_1), 0), 0);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_1), 1), -1);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_2), 0), 0);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_2), 1), 2);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_2), 2), 3);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_2), 3), -1);
}

private static void testMaxStringTruncateAtFirstReplacementCharacter(Slice prefix, Slice suffix)
{
  for (int testCodePoint : TEST_CODE_POINTS) {
    Slice codePoint = codePointToUtf8(testCodePoint);
    Slice value = concatSlice(prefix, codePoint, suffix);
    assertEquals(maxStringTruncateToValidRange(value, ORC_HIVE_8732), value);
    // For ORIGINAL, skip prefixes that truncate
    if (prefix.equals(maxStringTruncateToValidRange(prefix, ORIGINAL))) {
      if (testCodePoint == REPLACEMENT_CHARACTER_CODE_POINT || testCodePoint >= MIN_SUPPLEMENTARY_CODE_POINT) {
        // truncate at test code point
        assertEquals(maxStringTruncateToValidRange(value, ORIGINAL), concatSlice(prefix, wrappedBuffer((byte) 0xFF)));
      }
      else {
        // truncate in suffix (if at all)
        assertEquals(maxStringTruncateToValidRange(value, ORIGINAL), concatSlice(prefix, codePoint, maxStringTruncateToValidRange(suffix, ORIGINAL)));
      }
    }
  }
}

@Test(expectedExceptions = InvalidCodePointException.class, expectedExceptionsMessageRegExp = "Invalid code point 0xFFFFFFFF")
public void testLengthOfNegativeCodePoint()
{
  lengthOfCodePoint(-1);
}

private static void assertReverse(String string)
{
  Slice actualReverse = reverse(utf8Slice(string));
  int[] codePoints = string.codePoints().toArray();
  codePoints = Ints.toArray(Lists.reverse(Ints.asList(codePoints)));
  Slice expectedReverse = wrappedBuffer(new String(codePoints, 0, codePoints.length).getBytes(UTF_8));
  assertEquals(actualReverse, expectedReverse);
}

private static void assertReverseWithInvalidSequence(byte[] invalidSequence)
{
  assertEquals(
      reverse(wrappedBuffer(invalidSequence)),
      wrappedBuffer(invalidSequence));
  assertEquals(
      reverse(wrappedBuffer(concat(new byte[] {'a', 'b', 'c'}, invalidSequence))),
      wrappedBuffer(concat(invalidSequence, new byte[] {'c', 'b', 'a'})));
  assertEquals(
      reverse(wrappedBuffer(concat(invalidSequence, new byte[] {'x', 'y', 'z'}))),
      wrappedBuffer(concat(new byte[] {'z', 'y', 'x'}, invalidSequence)));
  assertEquals(
      reverse(wrappedBuffer(concat(new byte[] {'a', 'b', 'c'}, invalidSequence, new byte[] {'x', 'y', 'z'}))),
      wrappedBuffer(concat(new byte[] {'z', 'y', 'x'}, invalidSequence, new byte[] {'c', 'b', 'a'})));
}

private static void assertFixInvalidUtf8(Slice testSlice, Slice expectedSlice)
{
  assertEquals(fixInvalidUtf8(testSlice), expectedSlice);
}

private static void assertCodePointCount(String string)
{
  assertEquals(countCodePoints(utf8Slice(string)), string.codePoints().count());
}

private static void testMinStringTruncateAtFirstReplacementCharacter(Slice prefix, Slice suffix)
{
  for (int testCodePoint : TEST_CODE_POINTS) {
    Slice codePoint = codePointToUtf8(testCodePoint);
    Slice value = concatSlice(prefix, codePoint, suffix);
    assertEquals(minStringTruncateToValidRange(value, ORC_HIVE_8732), value);
    // For ORIGINAL, skip prefixes that truncate
    if (prefix.equals(minStringTruncateToValidRange(prefix, ORIGINAL))) {
      if (testCodePoint == REPLACEMENT_CHARACTER_CODE_POINT || testCodePoint >= MIN_SUPPLEMENTARY_CODE_POINT) {
        // truncate at test code point
        assertEquals(minStringTruncateToValidRange(value, ORIGINAL), prefix);
      }
      else {
        // truncate in suffix (if at all)
        assertEquals(minStringTruncateToValidRange(value, ORIGINAL), concatSlice(prefix, codePoint, minStringTruncateToValidRange(suffix, ORIGINAL)));
      }
    }
  }
}

public static Slice varcharPartitionKey(String value, String name, Type columnType)
{
  Slice partitionKey = Slices.utf8Slice(value);
  VarcharType varcharType = (VarcharType) columnType;
  if (SliceUtf8.countCodePoints(partitionKey) > varcharType.getLength()) {
    throw new PrestoException(HIVE_INVALID_PARTITION_VALUE, format("Invalid partition value '%s' for %s partition key: %s", value, columnType.toString(), name));
  }
  return partitionKey;
}

if (search.length() == 0) {
  Slice buffer = Slices.allocate((countCodePoints(str) + 1) * replace.length() + str.length());
  buffer.setBytes(0, replace);
  while (index < str.length()) {
    int codePointLength = lengthOfCodePointSafe(str, index);
    buffer.setBytes(indexBuffer, str, index, codePointLength);
Slice buffer = Slices.allocate(str.length());
    int bytesToCopy = str.length() - index;
    buffer = Slices.ensureSize(buffer, indexBuffer + bytesToCopy);
    buffer.setBytes(indexBuffer, str, index, bytesToCopy);
    indexBuffer += bytesToCopy;

if (delimiter.length() == 0) {
  int startCodePoint = toIntExact(index);
  int indexStart = offsetOfCodePoint(string, startCodePoint - 1);
  if (indexStart < 0) {
  int length = lengthOfCodePoint(string, indexStart);
  if (indexStart + length > string.length()) {
    throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Invalid UTF-8 encoding");
  return string.slice(indexStart, length);

public static Slice padSpaces(Slice slice, int length)
{
  int textLength = countCodePoints(slice);
  // if our string is bigger than requested then truncate
  if (textLength > length) {
    throw new IllegalArgumentException("pad length is smaller than slice length");
  }
  // if our target length is the same as our string then return our string
  if (textLength == length) {
    return slice;
  }
  // preallocate the result
  int bufferSize = slice.length() + length - textLength;
  Slice buffer = Slices.allocate(bufferSize);
  // fill in the existing string
  buffer.setBytes(0, slice);
  // fill padding spaces
  for (int i = slice.length(); i < bufferSize; ++i) {
    buffer.setByte(i, ' ');
  }
  return buffer;
}

Javadoc

Utility methods for UTF-8 encoded slices.

Most used methods

countCodePoints
Counts the code points within UTF-8 encoded slice up to length. Note: This method does not explicitl
offsetOfCodePoint
Starting from position bytes in utf8, finds the index of the first byte of the code point codePointC
lengthOfCodePoint
Gets the UTF-8 sequence length of the code point at position. Note: This method does not explicitly
codePointToUtf8
Convert the code point to UTF-8.
fixInvalidUtf8
getCodePointAt
Gets the UTF-8 encoded code point at the position. Note: This method does not explicitly check for v
lengthOfCodePointSafe
Gets the UTF-8 sequence length of the code point at position. Truncated UTF-8 sequences, 5 and 6 byt
tryGetCodePointAt
Tries to get the UTF-8 encoded code point at the position. A positive return value means the UTF-8 s
leftTrim
Removes all whiteSpaceCodePoints from the left side of the string. Note: Invalid UTF-8 sequences are
reverse
Reverses the slice code point by code point. Note: Invalid UTF-8 sequences are copied directly to th
rightTrim
Removes all white whiteSpaceCodePoints from the right side of the string. Note: Invalid UTF-8 sequen
setCodePointAt
Sets the UTF-8 sequence for code point at the position.

Popular in Java

Making http requests using okhttp
findViewById (Activity)
getContentResolver (Context)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
HttpURLConnection (java.net)
An URLConnection for HTTP (RFC 2616 [http://tools.ietf.org/html/rfc2616]) used to send and receive d
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
Map (java.util)
A Map is a data structure consisting of a set of keys and values in which each key is mapped to a si
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
Project (org.apache.tools.ant)
Central representation of an Ant project. This class defines an Ant project with all of its targets,
Reflections (org.reflections)
Reflections one-stop-shop objectReflections scans your classpath, indexes the metadata, allows you t
Top Sublime Text plugins

How to useSliceUtf8 in io.airlift.slice

Best Java code snippets using io.airlift.slice.SliceUtf8 (Showing top 20 results out of 315)

Refine search

How to use
SliceUtf8
in
io.airlift.slice