io.airlift.slice.SliceUtf8.offsetOfCodePoint java code examples

  private static int calculateTruncationLength(int maxCharacterCount, Slice slice, int offset, int length)
  {
    requireNonNull(slice, "slice is null");
    if (maxCharacterCount < 0) {
      throw new IllegalArgumentException("Max length must be greater or equal than zero");
    }
    if (length <= maxCharacterCount) {
      return length;
    }

    int indexEnd = offsetOfCodePoint(slice, offset, maxCharacterCount);
    if (indexEnd < 0) {
      return length;
    }
    return indexEnd - offset;
  }
}

int endIndex = offsetOfCodePoint(slice, offset, codePointCount);
if (endIndex < 0) {

  int indexStart = offsetOfCodePoint(utf8, startCodePoint - 1);
  if (indexStart < 0) {
  int indexEnd = offsetOfCodePoint(utf8, indexStart, lengthCodePoints);
  if (indexEnd < 0) {
int indexStart = offsetOfCodePoint(utf8, startCodePoint);
int indexEnd;
if (startCodePoint + lengthCodePoints < codePoints) {
  indexEnd = offsetOfCodePoint(utf8, indexStart, lengthCodePoints);

@Description("suffix starting at given index")
@ScalarFunction
@LiteralParameters("x")
@SqlType("varchar(x)")
public static Slice substr(@SqlType("varchar(x)") Slice utf8, @SqlType(StandardTypes.BIGINT) long start)
{
  if ((start == 0) || utf8.length() == 0) {
    return Slices.EMPTY_SLICE;
  }
  int startCodePoint = Ints.saturatedCast(start);
  if (startCodePoint > 0) {
    int indexStart = offsetOfCodePoint(utf8, startCodePoint - 1);
    if (indexStart < 0) {
      // before beginning of string
      return Slices.EMPTY_SLICE;
    }
    int indexEnd = utf8.length();
    return utf8.slice(indexStart, indexEnd - indexStart);
  }
  // negative start is relative to end of string
  int codePoints = countCodePoints(utf8);
  startCodePoint += codePoints;
  // before beginning of string
  if (startCodePoint < 0) {
    return Slices.EMPTY_SLICE;
  }
  int indexStart = offsetOfCodePoint(utf8, startCodePoint);
  int indexEnd = utf8.length();
  return utf8.slice(indexStart, indexEnd - indexStart);
}

int startCodePoint = toIntExact(index);
int indexStart = offsetOfCodePoint(string, startCodePoint - 1);
if (indexStart < 0) {

int[] padStringCounts = new int[padStringLength];
for (int i = 0; i < padStringLength; ++i) {
  padStringCounts[i] = lengthOfCodePointSafe(padString, offsetOfCodePoint(padString, i));

/**
 * Finds the index of the first byte of the code point at a position, or
 * {@code -1} if the position is not within the slice.
 * <p>
 * Note: This method does not explicitly check for valid UTF-8, and may
 * return incorrect results or throw an exception for invalid UTF-8.
 */
public static int offsetOfCodePoint(Slice utf8, int codePointCount)
{
  return offsetOfCodePoint(utf8, 0, codePointCount);
}

/**
 * Finds the index of the first byte of the code point at a position, or
 * {@code -1} if the position is not within the slice.
 * <p>
 * Note: This method does not explicitly check for valid UTF-8, and may
 * return incorrect results or throw an exception for invalid UTF-8.
 */
public static int offsetOfCodePoint(Slice utf8, int codePointCount)
{
  return offsetOfCodePoint(utf8, 0, codePointCount);
}

  private static int calculateTruncationLength(int maxCharacterCount, Slice slice, int offset, int length)
  {
    requireNonNull(slice, "slice is null");
    if (maxCharacterCount < 0) {
      throw new IllegalArgumentException("Max length must be greater or equal than zero");
    }
    if (length <= maxCharacterCount) {
      return length;
    }

    int indexEnd = offsetOfCodePoint(slice, offset, maxCharacterCount);
    if (indexEnd < 0) {
      return length;
    }
    return indexEnd - offset;
  }
}

  private static int calculateTruncationLength(int maxCharacterCount, Slice slice, int offset, int length)
  {
    requireNonNull(slice, "slice is null");
    if (maxCharacterCount < 0) {
      throw new IllegalArgumentException("Max length must be greater or equal than zero");
    }
    if (length <= maxCharacterCount) {
      return length;
    }

    int indexEnd = offsetOfCodePoint(slice, offset, maxCharacterCount);
    if (indexEnd < 0) {
      return length;
    }
    return indexEnd - offset;
  }
}

  public static Slice truncate(Slice slice, int length)
  {
    if (length < 0) {
      throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Length smaller then zero");
    }
    else if (length == 0) {
      return Slices.EMPTY_SLICE;
    }
    int indexEnd = offsetOfCodePoint(slice, length);
    if (indexEnd < 0) {
      return slice;
    }
    return slice.slice(0, indexEnd);
  }
}

/**
 * Gets the substring starting at {@code codePointStart} and extending for
 * {@code codePointLength} code points.
 * <p>
 * Note: This method does not explicitly check for valid UTF-8, and may
 * return incorrect results or throw an exception for invalid UTF-8.
 */
public static Slice substring(Slice utf8, int codePointStart, int codePointLength)
{
  checkArgument(codePointStart >= 0, "codePointStart is negative");
  checkArgument(codePointLength >= 0, "codePointLength is negative");
  int indexStart = offsetOfCodePoint(utf8, codePointStart);
  if (indexStart < 0) {
    throw new IllegalArgumentException("UTF-8 does not contain " + codePointStart + " code points");
  }
  if (codePointLength == 0) {
    return Slices.EMPTY_SLICE;
  }
  int indexEnd = offsetOfCodePoint(utf8, indexStart, codePointLength - 1);
  if (indexEnd < 0) {
    throw new IllegalArgumentException("UTF-8 does not contain " + (codePointStart + codePointLength) + " code points");
  }
  indexEnd += lengthOfCodePoint(utf8, indexEnd);
  if (indexEnd > utf8.length()) {
    throw new InvalidUtf8Exception("UTF-8 is not well formed");
  }
  return utf8.slice(indexStart, indexEnd - indexStart);
}

/**
 * Test invalid UTF8 encodings. We do not expect a 'correct' but none harmful result.
 */
@Test
public void testInvalidUtf8()
{
  assertEquals(countCodePoints(wrappedBuffer(INVALID_UTF8_1)), 0);
  assertEquals(countCodePoints(wrappedBuffer(INVALID_UTF8_2)), 3);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_1), 0), 0);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_1), 1), -1);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_2), 0), 0);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_2), 1), 2);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_2), 2), 3);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_2), 3), -1);
}

/**
 * Test invalid UTF8 encodings. We do not expect a 'correct' but none harmful result.
 */
@Test
public void testInvalidUtf8()
{
  assertEquals(countCodePoints(wrappedBuffer(INVALID_UTF8_1)), 0);
  assertEquals(countCodePoints(wrappedBuffer(INVALID_UTF8_2)), 3);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_1), 0), 0);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_1), 1), -1);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_2), 0), 0);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_2), 1), 2);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_2), 2), 3);
  assertEquals(offsetOfCodePoint(wrappedBuffer(INVALID_UTF8_2), 3), -1);
}

private static void assertOffsetByCodePoints(String string)
{
  Slice utf8 = utf8Slice(string);
  int codePoints = (int) string.codePoints().count();
  int lastIndex = 0;
  int characterIndex = 0;
  for (int codePointIndex = 0; codePointIndex < codePoints; codePointIndex++) {
    int expectedIndex = 0;
    // calculate the expected index by searching forward from the last index
    if (codePointIndex > 0) {
      expectedIndex = lastIndex + lengthOfCodePoint(string.codePointAt(characterIndex));
      characterIndex = string.offsetByCodePoints(characterIndex, 1);
    }
    // avoid n^2 performance for large test string
    if (codePointIndex < 10000) {
      assertEquals(offsetOfCodePoint(utf8, codePointIndex), expectedIndex);
    }
    if (codePointIndex > 0) {
      assertEquals(offsetOfCodePoint(utf8, lastIndex, 1), expectedIndex);
    }
    lastIndex = expectedIndex;
  }
  assertEquals(offsetOfCodePoint(utf8Slice(string), codePoints), -1);
}

private static void assertOffsetByCodePoints(String string)
{
  Slice utf8 = utf8Slice(string);
  int codePoints = (int) string.codePoints().count();
  int lastIndex = 0;
  int characterIndex = 0;
  for (int codePointIndex = 0; codePointIndex < codePoints; codePointIndex++) {
    int expectedIndex = 0;
    // calculate the expected index by searching forward from the last index
    if (codePointIndex > 0) {
      expectedIndex = lastIndex + lengthOfCodePoint(string.codePointAt(characterIndex));
      characterIndex = string.offsetByCodePoints(characterIndex, 1);
    }
    // avoid n^2 performance for large test string
    if (codePointIndex < 10000) {
      assertEquals(offsetOfCodePoint(utf8, codePointIndex), expectedIndex);
    }
    if (codePointIndex > 0) {
      assertEquals(offsetOfCodePoint(utf8, lastIndex, 1), expectedIndex);
    }
    lastIndex = expectedIndex;
  }
  assertEquals(offsetOfCodePoint(utf8Slice(string), codePoints), -1);
}

@Benchmark
public int benchmarkOffsetByCodePoints(BenchmarkData data)
{
  Slice slice = data.getSlice();
  int offset = offsetOfCodePoint(slice, data.getLength() - 1);
  if (offset + lengthOfCodePoint(slice, offset) != slice.length()) {
    throw new AssertionError();
  }
  return offset;
}

@Benchmark
public int benchmarkOffsetByCodePoints(BenchmarkData data)
{
  Slice slice = data.getSlice();
  int offset = offsetOfCodePoint(slice, data.getLength() - 1);
  if (offset + lengthOfCodePoint(slice, offset) != slice.length()) {
    throw new AssertionError();
  }
  return offset;
}

@Test
public void testOffsetByCodePoints()
{
  assertEquals(offsetOfCodePoint(EMPTY_SLICE, 0), -1);
  assertOffsetByCodePoints(STRING_HELLO);
  assertOffsetByCodePoints(STRING_QUADRATICALLY);
  assertOffsetByCodePoints(STRING_OESTERREICH);
  assertOffsetByCodePoints(STRING_DULIOE_DULIOE);
  assertOffsetByCodePoints(STRING_FAITH_HOPE_LOVE);
  assertOffsetByCodePoints(STRING_NAIVE);
  assertOffsetByCodePoints(STRING_OO);
  assertOffsetByCodePoints(STRING_ASCII_CODE_POINTS);
  assertOffsetByCodePoints(STRING_ALL_CODE_POINTS);
  assertOffsetByCodePoints(STRING_ALL_CODE_POINTS_RANDOM);
}

@Test
public void testOffsetByCodePoints()
{
  assertEquals(offsetOfCodePoint(EMPTY_SLICE, 0), -1);
  assertOffsetByCodePoints(STRING_HELLO);
  assertOffsetByCodePoints(STRING_QUADRATICALLY);
  assertOffsetByCodePoints(STRING_OESTERREICH);
  assertOffsetByCodePoints(STRING_DULIOE_DULIOE);
  assertOffsetByCodePoints(STRING_FAITH_HOPE_LOVE);
  assertOffsetByCodePoints(STRING_NAIVE);
  assertOffsetByCodePoints(STRING_OO);
  assertOffsetByCodePoints(STRING_ASCII_CODE_POINTS);
  assertOffsetByCodePoints(STRING_ALL_CODE_POINTS);
  assertOffsetByCodePoints(STRING_ALL_CODE_POINTS_RANDOM);
}

Javadoc

Finds the index of the first byte of the code point at a position, or -1 if the position is not within the slice.

Note: This method does not explicitly check for valid UTF-8, and may return incorrect results or throw an exception for invalid UTF-8.

Popular methods of SliceUtf8

countCodePoints
Counts the code points within UTF-8 encoded slice up to length. Note: This method does not explicitl
lengthOfCodePoint
Gets the UTF-8 sequence length of the code point at position. Note: This method does not explicitly
codePointToUtf8
Convert the code point to UTF-8.
fixInvalidUtf8
getCodePointAt
Gets the UTF-8 encoded code point at the position. Note: This method does not explicitly check for v
lengthOfCodePointSafe
Gets the UTF-8 sequence length of the code point at position. Truncated UTF-8 sequences, 5 and 6 byt
tryGetCodePointAt
Tries to get the UTF-8 encoded code point at the position. A positive return value means the UTF-8 s
leftTrim
Removes all whiteSpaceCodePoints from the left side of the string. Note: Invalid UTF-8 sequences are
reverse
Reverses the slice code point by code point. Note: Invalid UTF-8 sequences are copied directly to th
rightTrim
Removes all white whiteSpaceCodePoints from the right side of the string. Note: Invalid UTF-8 sequen
setCodePointAt
Sets the UTF-8 sequence for code point at the position.
substring
Gets the substring starting at codePointStart and extending for codePointLength code points. Note: T

Popular in Java

Reactive rest calls using spring rest template
setRequestProperty (URLConnection)
compareTo (BigDecimal)
getSupportFragmentManager (FragmentActivity)
EOFException (java.io)
Thrown when a program encounters the end of a file or stream during an input operation.
InetAddress (java.net)
An Internet Protocol (IP) address. This can be either an IPv4 address or an IPv6 address, and in pra
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
LoggerFactory (org.slf4j)
The LoggerFactory is a utility class producing Loggers for various logging APIs, most notably for lo
Filter (javax.servlet)
A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
Table (org.hibernate.mapping)
A relational table
Top plugins for Android Studio

How to use offsetOfCodePointmethodin io.airlift.slice.SliceUtf8

Best Java code snippets using io.airlift.slice.SliceUtf8.offsetOfCodePoint (Showing top 20 results out of 315)

How to use
offsetOfCodePoint
method
in
io.airlift.slice.SliceUtf8