/** * Gets the UTF-8 sequence length of the code point at {@code position}. * <p> * Note: This method does not explicitly check for valid UTF-8, and may * return incorrect results or throw an exception for invalid UTF-8. */ public static int lengthOfCodePoint(Slice utf8, int position) { return lengthOfCodePointFromStartByte(utf8.getByte(position)); }
/** * Gets the UTF-8 sequence length of the code point at {@code position}. * <p> * Note: This method does not explicitly check for valid UTF-8, and may * return incorrect results or throw an exception for invalid UTF-8. */ public static int lengthOfCodePoint(Slice utf8, int position) { return lengthOfCodePointFromStartByte(utf8.getByte(position)); }
@Test(expectedExceptions = InvalidUtf8Exception.class, expectedExceptionsMessageRegExp = "Illegal start 0xFF of code point") public void testLengthOfCodePointFFByte() { lengthOfCodePointFromStartByte(INVALID_FF_BYTE); }
@Test(expectedExceptions = InvalidUtf8Exception.class, expectedExceptionsMessageRegExp = "Illegal start 0xFD of code point") public void testLengthOfCodePoint6ByteByte() { lengthOfCodePointFromStartByte(START_6_BYTE); }
@Test(expectedExceptions = InvalidUtf8Exception.class, expectedExceptionsMessageRegExp = "Illegal start 0xFF of code point") public void testLengthOfCodePointFFByte() { lengthOfCodePointFromStartByte(INVALID_FF_BYTE); }
@Test(expectedExceptions = InvalidUtf8Exception.class, expectedExceptionsMessageRegExp = "Illegal start 0xBF of code point") public void testLengthOfCodePointContinuationByte() { lengthOfCodePointFromStartByte(CONTINUATION_BYTE); }
@Test(expectedExceptions = InvalidUtf8Exception.class, expectedExceptionsMessageRegExp = "Illegal start 0xFB of code point") public void testLengthOfCodePoint5ByteSequence() { lengthOfCodePointFromStartByte(START_5_BYTE); }
@Test(expectedExceptions = InvalidUtf8Exception.class, expectedExceptionsMessageRegExp = "Illegal start 0xFD of code point") public void testLengthOfCodePoint6ByteByte() { lengthOfCodePointFromStartByte(START_6_BYTE); }
@Test(expectedExceptions = InvalidUtf8Exception.class, expectedExceptionsMessageRegExp = "Illegal start 0xFB of code point") public void testLengthOfCodePoint5ByteSequence() { lengthOfCodePointFromStartByte(START_5_BYTE); }
@Test(expectedExceptions = InvalidUtf8Exception.class, expectedExceptionsMessageRegExp = "Illegal start 0xFE of code point") public void testLengthOfCodePointFEByte() { lengthOfCodePointFromStartByte(INVALID_FE_BYTE); }
@Test(expectedExceptions = InvalidUtf8Exception.class, expectedExceptionsMessageRegExp = "Illegal start 0xBF of code point") public void testLengthOfCodePointContinuationByte() { lengthOfCodePointFromStartByte(CONTINUATION_BYTE); }
@Test(expectedExceptions = InvalidUtf8Exception.class, expectedExceptionsMessageRegExp = "Illegal start 0xFE of code point") public void testLengthOfCodePointFEByte() { lengthOfCodePointFromStartByte(INVALID_FE_BYTE); }
@Test public void testLengthOfCodePoint() { assertEquals(lengthOfCodePointFromStartByte(START_1_BYTE), 1); assertEquals(lengthOfCodePointFromStartByte(START_2_BYTE), 2); assertEquals(lengthOfCodePointFromStartByte(START_3_BYTE), 3); assertEquals(lengthOfCodePointFromStartByte(START_4_BYTE), 4); for (int codePoint : ALL_CODE_POINTS) { String string = new String(new int[] {codePoint}, 0, 1); assertEquals(string.codePoints().count(), 1); Slice utf8 = wrappedBuffer(string.getBytes(UTF_8)); assertEquals(lengthOfCodePoint(codePoint), utf8.length()); assertEquals(lengthOfCodePoint(utf8, 0), utf8.length()); assertEquals(lengthOfCodePointSafe(utf8, 0), utf8.length()); assertEquals(lengthOfCodePointFromStartByte(utf8.getByte(0)), utf8.length()); assertEquals(getCodePointAt(utf8, 0), codePoint); assertEquals(getCodePointBefore(utf8, utf8.length()), codePoint); assertEquals(codePointToUtf8(codePoint), utf8); } for (byte[] sequence : INVALID_SEQUENCES) { assertEquals(lengthOfCodePointSafe(wrappedBuffer(sequence), 0), sequence.length); assertEquals(lengthOfCodePointSafe(wrappedBuffer(concat(new byte[] {'x'}, sequence)), 1), sequence.length); assertEquals(lengthOfCodePointSafe(wrappedBuffer(concat(sequence, new byte[] {'x'})), 0), sequence.length); } }
@Test public void testLengthOfCodePoint() { assertEquals(lengthOfCodePointFromStartByte(START_1_BYTE), 1); assertEquals(lengthOfCodePointFromStartByte(START_2_BYTE), 2); assertEquals(lengthOfCodePointFromStartByte(START_3_BYTE), 3); assertEquals(lengthOfCodePointFromStartByte(START_4_BYTE), 4); for (int codePoint : ALL_CODE_POINTS) { String string = new String(new int[] {codePoint}, 0, 1); assertEquals(string.codePoints().count(), 1); Slice utf8 = wrappedBuffer(string.getBytes(UTF_8)); assertEquals(lengthOfCodePoint(codePoint), utf8.length()); assertEquals(lengthOfCodePoint(utf8, 0), utf8.length()); assertEquals(lengthOfCodePointSafe(utf8, 0), utf8.length()); assertEquals(lengthOfCodePointFromStartByte(utf8.getByte(0)), utf8.length()); assertEquals(getCodePointAt(utf8, 0), codePoint); assertEquals(getCodePointBefore(utf8, utf8.length()), codePoint); assertEquals(codePointToUtf8(codePoint), utf8); } for (byte[] sequence : INVALID_SEQUENCES) { assertEquals(lengthOfCodePointSafe(wrappedBuffer(sequence), 0), sequence.length); assertEquals(lengthOfCodePointSafe(wrappedBuffer(concat(new byte[] {'x'}, sequence)), 1), sequence.length); assertEquals(lengthOfCodePointSafe(wrappedBuffer(concat(sequence, new byte[] {'x'})), 0), sequence.length); } }
@Benchmark public int benchmarkLengthOfCodePointFromStartByte(BenchmarkData data) { Slice slice = data.getSlice(); int i = 0; int codePoints = 0; while (i < slice.length()) { i += lengthOfCodePointFromStartByte(slice.getByte(i)); codePoints++; } if (codePoints != data.getLength()) { throw new AssertionError(); } return codePoints; }
@Benchmark public int benchmarkLengthOfCodePointFromStartByte(BenchmarkData data) { Slice slice = data.getSlice(); int i = 0; int codePoints = 0; while (i < slice.length()) { i += lengthOfCodePointFromStartByte(slice.getByte(i)); codePoints++; } if (codePoints != data.getLength()) { throw new AssertionError(); } return codePoints; }