/** * Initialize the byte[] from the UTF8 bytes * for the provided String. * * @param text This must be well-formed * unicode text, with no unpaired surrogates. */ public BytesRef(CharSequence text) { this(new byte[UnicodeUtil.maxUTF8Length(text.length())]); length = UnicodeUtil.UTF16toUTF8(text, 0, text.length(), bytes); }
/** * Replace the content of this buffer with UTF-8 encoded bytes that would * represent the provided text. */ public void copyChars(CharSequence text, int off, int len) { grow(UnicodeUtil.maxUTF8Length(len)); ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes); }
/** * Replace the content of this buffer with UTF-8 encoded bytes that would * represent the provided text. */ public void copyChars(char[] text, int off, int len) { grow(UnicodeUtil.maxUTF8Length(len)); ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes); }
@Override public void writeString(String string) throws IOException { int maxLen = UnicodeUtil.maxUTF8Length(string.length()); if (maxLen <= MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING) { // string is small enough that we don't need to save memory by falling back to double-pass approach // this is just an optimized writeString() that re-uses scratchBytes. if (scratchBytes == null) { scratchBytes = new byte[ArrayUtil.oversize(maxLen, Character.BYTES)]; } else { scratchBytes = ArrayUtil.grow(scratchBytes, maxLen); } int len = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), scratchBytes); writeVInt(len); writeBytes(scratchBytes, len); } else { // use a double pass approach to avoid allocating a large intermediate buffer for string encoding int numBytes = UnicodeUtil.calcUTF16toUTF8Length(string, 0, string.length()); writeVInt(numBytes); bytes = ArrayUtil.grow(bytes, length + numBytes); length = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), bytes, length); } }
private static BytesRef encodeUtf8Id(String id) { byte[] b = new byte[1 + UnicodeUtil.maxUTF8Length(id.length())]; // Prepend a byte that indicates that the content is an utf8 string b[0] = (byte) UTF8; int length = UnicodeUtil.UTF16toUTF8(id, 0, id.length(), b, 1); return new BytesRef(b, 0, length); }
/** * Initialize the byte[] from the UTF8 bytes * for the provided String. * * @param text This must be well-formed * unicode text, with no unpaired surrogates. */ public BytesRef(CharSequence text) { this(new byte[UnicodeUtil.maxUTF8Length(text.length())]); length = UnicodeUtil.UTF16toUTF8(text, 0, text.length(), bytes); }
private static BytesRef encodeUtf8Id(String id) { byte[] b = new byte[1 + UnicodeUtil.maxUTF8Length(id.length())]; // Prepend a byte that indicates that the content is an utf8 string b[0] = (byte) UTF8; int length = UnicodeUtil.UTF16toUTF8(id, 0, id.length(), b, 1); return new BytesRef(b, 0, length); }
private static BytesRef encodeUtf8Id(String id) { byte[] b = new byte[1 + UnicodeUtil.maxUTF8Length(id.length())]; // Prepend a byte that indicates that the content is an utf8 string b[0] = (byte) UTF8; int length = UnicodeUtil.UTF16toUTF8(id, 0, id.length(), b, 1); return new BytesRef(b, 0, length); }
/** * Replace the content of this buffer with UTF-8 encoded bytes that would * represent the provided text. */ public void copyChars(CharSequence text, int off, int len) { grow(UnicodeUtil.maxUTF8Length(len)); ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes); }
/** * Replace the content of this buffer with UTF-8 encoded bytes that would * represent the provided text. */ public void copyChars(char[] text, int off, int len) { grow(UnicodeUtil.maxUTF8Length(len)); ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes); }
private BytesRef toBytes(FacetLabel label) { BytesRefBuilder bytes = this.bytes.get(); bytes.clear(); for (int i = 0; i < label.length; i++) { String part = label.components[i]; if (i > 0) { bytes.append(DELIM_CHAR); } bytes.grow(bytes.length() + UnicodeUtil.maxUTF8Length(part.length())); bytes.setLength(UnicodeUtil.UTF16toUTF8(part, 0, part.length(), bytes.bytes(), bytes.length())); } return bytes.get(); } }
@Override public void writeString(String string) throws IOException { int maxLen = UnicodeUtil.maxUTF8Length(string.length()); if (maxLen <= MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING) { // string is small enough that we don't need to save memory by falling back to double-pass approach // this is just an optimized writeString() that re-uses scratchBytes. if (scratchBytes == null) { scratchBytes = new byte[ArrayUtil.oversize(maxLen, Character.BYTES)]; } else { scratchBytes = ArrayUtil.grow(scratchBytes, maxLen); } int len = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), scratchBytes); writeVInt(len); writeBytes(scratchBytes, len); } else { // use a double pass approach to avoid allocating a large intermediate buffer for string encoding int numBytes = UnicodeUtil.calcUTF16toUTF8Length(string, 0, string.length()); writeVInt(numBytes); bytes = ArrayUtil.grow(bytes, length + numBytes); length = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), bytes, length); } }