org.apache.lucene.util.UnicodeUtil.maxUTF8Length java code examples

/**
 * Initialize the byte[] from the UTF8 bytes
 * for the provided String.  
 * 
 * @param text This must be well-formed
 * unicode text, with no unpaired surrogates.
 */
public BytesRef(CharSequence text) {
 this(new byte[UnicodeUtil.maxUTF8Length(text.length())]);
 length = UnicodeUtil.UTF16toUTF8(text, 0, text.length(), bytes);
}

/**
 * Replace the content of this buffer with UTF-8 encoded bytes that would
 * represent the provided text.
 */
public void copyChars(CharSequence text, int off, int len) {
 grow(UnicodeUtil.maxUTF8Length(len));
 ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes);
}

/**
 * Replace the content of this buffer with UTF-8 encoded bytes that would
 * represent the provided text.
 */
public void copyChars(char[] text, int off, int len) {
 grow(UnicodeUtil.maxUTF8Length(len));
 ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes);
}

@Override
public void writeString(String string) throws IOException {
 int maxLen = UnicodeUtil.maxUTF8Length(string.length());
 if (maxLen <= MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING)  {
  // string is small enough that we don't need to save memory by falling back to double-pass approach
  // this is just an optimized writeString() that re-uses scratchBytes.
  if (scratchBytes == null) {
   scratchBytes = new byte[ArrayUtil.oversize(maxLen, Character.BYTES)];
  } else {
   scratchBytes = ArrayUtil.grow(scratchBytes, maxLen);
  }
  int len = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), scratchBytes);
  writeVInt(len);
  writeBytes(scratchBytes, len);
 } else  {
  // use a double pass approach to avoid allocating a large intermediate buffer for string encoding
  int numBytes = UnicodeUtil.calcUTF16toUTF8Length(string, 0, string.length());
  writeVInt(numBytes);
  bytes = ArrayUtil.grow(bytes, length + numBytes);
  length = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), bytes, length);
 }
}

private static BytesRef encodeUtf8Id(String id) {
  byte[] b = new byte[1 + UnicodeUtil.maxUTF8Length(id.length())];
  // Prepend a byte that indicates that the content is an utf8 string
  b[0] = (byte) UTF8;
  int length = UnicodeUtil.UTF16toUTF8(id, 0, id.length(), b, 1);
  return new BytesRef(b, 0, length);
}

/**
 * Initialize the byte[] from the UTF8 bytes
 * for the provided String.  
 * 
 * @param text This must be well-formed
 * unicode text, with no unpaired surrogates.
 */
public BytesRef(CharSequence text) {
 this(new byte[UnicodeUtil.maxUTF8Length(text.length())]);
 length = UnicodeUtil.UTF16toUTF8(text, 0, text.length(), bytes);
}

private static BytesRef encodeUtf8Id(String id) {
  byte[] b = new byte[1 + UnicodeUtil.maxUTF8Length(id.length())];
  // Prepend a byte that indicates that the content is an utf8 string
  b[0] = (byte) UTF8;
  int length = UnicodeUtil.UTF16toUTF8(id, 0, id.length(), b, 1);
  return new BytesRef(b, 0, length);
}

private static BytesRef encodeUtf8Id(String id) {
  byte[] b = new byte[1 + UnicodeUtil.maxUTF8Length(id.length())];
  // Prepend a byte that indicates that the content is an utf8 string
  b[0] = (byte) UTF8;
  int length = UnicodeUtil.UTF16toUTF8(id, 0, id.length(), b, 1);
  return new BytesRef(b, 0, length);
}

/**
 * Replace the content of this buffer with UTF-8 encoded bytes that would
 * represent the provided text.
 */
public void copyChars(CharSequence text, int off, int len) {
 grow(UnicodeUtil.maxUTF8Length(len));
 ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes);
}

/**
 * Replace the content of this buffer with UTF-8 encoded bytes that would
 * represent the provided text.
 */
public void copyChars(char[] text, int off, int len) {
 grow(UnicodeUtil.maxUTF8Length(len));
 ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes);
}

 private BytesRef toBytes(FacetLabel label) {
  BytesRefBuilder bytes = this.bytes.get();
  bytes.clear();
  for (int i = 0; i < label.length; i++) {
   String part = label.components[i];
   if (i > 0) {
    bytes.append(DELIM_CHAR);
   }
   bytes.grow(bytes.length() + UnicodeUtil.maxUTF8Length(part.length()));
   bytes.setLength(UnicodeUtil.UTF16toUTF8(part, 0, part.length(), bytes.bytes(), bytes.length()));
  }
  return bytes.get();
 }
}

@Override
public void writeString(String string) throws IOException {
 int maxLen = UnicodeUtil.maxUTF8Length(string.length());
 if (maxLen <= MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING)  {
  // string is small enough that we don't need to save memory by falling back to double-pass approach
  // this is just an optimized writeString() that re-uses scratchBytes.
  if (scratchBytes == null) {
   scratchBytes = new byte[ArrayUtil.oversize(maxLen, Character.BYTES)];
  } else {
   scratchBytes = ArrayUtil.grow(scratchBytes, maxLen);
  }
  int len = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), scratchBytes);
  writeVInt(len);
  writeBytes(scratchBytes, len);
 } else  {
  // use a double pass approach to avoid allocating a large intermediate buffer for string encoding
  int numBytes = UnicodeUtil.calcUTF16toUTF8Length(string, 0, string.length());
  writeVInt(numBytes);
  bytes = ArrayUtil.grow(bytes, length + numBytes);
  length = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), bytes, length);
 }
}

Javadoc

Returns the maximum number of utf8 bytes required to encode a utf16 (e.g., java char[], String)

Popular methods of UnicodeUtil

UTF8toUTF16
Interprets the given byte array as UTF-8 and converts to UTF-16. It is the responsibility of the cal
UTF16toUTF8
Encode characters from a char[] source, starting at offset and stopping when the character 0xffff is
newString
Cover JDK 1.5 API. Create a String from an array of codePoints.
UTF8toUTF32
This method assumes valid UTF8 input. This methoddoes not perform full UTF8 validation, it will chec
calcUTF16toUTF8Length
Calculates the number of UTF8 bytes necessary to write a UTF16 string.
codePointCount
Returns the number of code points in this UTF8 sequence.This method assumes valid UTF8 input. This m
UTF16toUTF8WithHash

Popular in Java

Parsing JSON documents to java classes using gson
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
getResourceAsStream (ClassLoader)
getExternalFilesDir (Context)
IOException (java.io)
Signals a general, I/O-related error. Error details may be specified when calling the constructor, a
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
ArrayList (java.util)
ArrayList is an implementation of List, backed by an array. All optional operations including adding
Scanner (java.util)
A parser that parses a text string of primitive types and strings with the help of regular expressio
ReentrantLock (java.util.concurrent.locks)
A reentrant mutual exclusion Lock with the same basic behavior and semantics as the implicit monitor
JCheckBox (javax.swing)
Top plugins for Android Studio

How to use maxUTF8Lengthmethodin org.apache.lucene.util.UnicodeUtil

Best Java code snippets using org.apache.lucene.util.UnicodeUtil.maxUTF8Length (Showing top 12 results out of 315)

How to use
maxUTF8Length
method
in
org.apache.lucene.util.UnicodeUtil