/** * Utility method for {@link #UTF8toUTF16(byte[], int, int, char[])} * @see #UTF8toUTF16(byte[], int, int, char[]) */ public static int UTF8toUTF16(BytesRef bytesRef, char[] chars) { return UTF8toUTF16(bytesRef.bytes, bytesRef.offset, bytesRef.length, chars); }
/** Interprets stored bytes as UTF8 bytes, returning the * resulting string */ public String utf8ToString() { final char[] ref = new char[length]; final int len = UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref); return new String(ref, 0, len); }
/** * Copy the provided bytes, interpreted as UTF-8 bytes. */ public void copyUTF8Bytes(byte[] bytes, int offset, int length) { grow(length); ref.length = UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref.chars); }
/** * Build a minimal, deterministic automaton from a sorted list of {@link BytesRef} representing * strings in UTF-8. These strings must be binary-sorted. */ public static Automaton build(Collection<BytesRef> input) { final DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder(); char[] chars = new char[0]; CharsRef ref = new CharsRef(); for (BytesRef b : input) { chars = ArrayUtil.grow(chars, b.length); final int len = UnicodeUtil.UTF8toUTF16(b, chars); ref.chars = chars; ref.length = len; builder.add(ref); } Automaton.Builder a = new Automaton.Builder(); convert(a, builder.complete(), new IdentityHashMap<State,Integer>()); return a.finish(); }
/** * Utility method for {@link #UTF8toUTF16(byte[], int, int, char[])} * @see #UTF8toUTF16(byte[], int, int, char[]) */ public static int UTF8toUTF16(BytesRef bytesRef, char[] chars) { return UTF8toUTF16(bytesRef.bytes, bytesRef.offset, bytesRef.length, chars); }
/** * Utility method for {@link #UTF8toUTF16(byte[], int, int, CharsRef)} * @see #UTF8toUTF16(byte[], int, int, CharsRef) */ public static void UTF8toUTF16(BytesRef bytesRef, CharsRef chars) { UTF8toUTF16(bytesRef.bytes, bytesRef.offset, bytesRef.length, chars); }
/** * Utility method for {@link #UTF8toUTF16(byte[], int, int, char[])} * @see #UTF8toUTF16(byte[], int, int, char[]) */ public static int UTF8toUTF16(BytesRef bytesRef, char[] chars) { return UTF8toUTF16(bytesRef.bytes, bytesRef.offset, bytesRef.length, chars); }
/** * Utility method for {@link #UTF8toUTF16(byte[], int, int, char[])} * @see #UTF8toUTF16(byte[], int, int, char[]) */ public static int UTF8toUTF16(BytesRef bytesRef, char[] chars) { return UTF8toUTF16(bytesRef.bytes, bytesRef.offset, bytesRef.length, chars); }
@Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { if (fstReader == null) { // No overrides return true; } if (!keywordAtt.isKeyword()) { // don't muck with already-keyworded terms final BytesRef stem = stemmerOverrideMap.get(termAtt.buffer(), termAtt.length(), scratchArc, fstReader); if (stem != null) { spare = ArrayUtil.grow(termAtt.buffer(), stem.length); final int length = UnicodeUtil.UTF8toUTF16(stem, spare); if (spare != termAtt.buffer()) { termAtt.copyBuffer(spare, 0, length); } else { termAtt.setLength(length); } keywordAtt.setKeyword(true); } } return true; } else { return false; } }
/** Interprets stored bytes as UTF8 bytes, returning the * resulting string */ public String utf8ToString() { final char[] ref = new char[length]; final int len = UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref); return new String(ref, 0, len); }
/** Interprets stored bytes as UTF8 bytes, returning the * resulting string */ public String utf8ToString() { final char[] ref = new char[length]; final int len = UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref); return new String(ref, 0, len); }
/** Interprets stored bytes as UTF8 bytes, returning the * resulting string */ public String utf8ToString() { final char[] ref = new char[length]; final int len = UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref); return new String(ref, 0, len); }
/** * Copy the provided bytes, interpreted as UTF-8 bytes. */ public void copyUTF8Bytes(byte[] bytes, int offset, int length) { grow(length); ref.length = UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref.chars); }
/** * Copy the provided bytes, interpreted as UTF-8 bytes. */ public void copyUTF8Bytes(byte[] bytes, int offset, int length) { grow(length); ref.length = UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref.chars); }
/** Decompress the byte array previously returned by * compressString back into a String */ public static String decompressString(byte[] value, int offset, int length) throws DataFormatException { final byte[] bytes = decompress(value, offset, length); final char[] result = new char[bytes.length]; final int len = UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result); return new String(result, 0, len); }
/** Decompress the byte array previously returned by * compressString back into a String */ public static String decompressString(byte[] value, int offset, int length) throws DataFormatException { final byte[] bytes = decompress(value, offset, length); final char[] result = new char[bytes.length]; final int len = UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result); return new String(result, 0, len); }
/** * Copy the provided bytes, interpreted as UTF-8 bytes. */ public void copyUTF8Bytes(byte[] bytes, int offset, int length) { grow(length); ref.length = UnicodeUtil.UTF8toUTF16(bytes, offset, length, ref.chars); }
/** Decompress the byte array previously returned by * compressString back into a String */ public static String decompressString(byte[] value, int offset, int length) throws DataFormatException { final byte[] bytes = decompress(value, offset, length); CharsRef result = new CharsRef(bytes.length); UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result); return new String(result.chars, 0, result.length); }
@Override public List<LookupResult> lookup(CharSequence key, boolean higherWeightsFirst, int num) { final List<Completion> completions; if (higherWeightsFirst) { completions = higherWeightsCompletion.lookup(key, num); } else { completions = normalCompletion.lookup(key, num); } final ArrayList<LookupResult> results = new ArrayList<LookupResult>(completions.size()); CharsRef spare = new CharsRef(); for (Completion c : completions) { spare.grow(c.utf8.length); UnicodeUtil.UTF8toUTF16(c.utf8, spare); results.add(new LookupResult(spare.toString(), c.bucket)); } return results; }
/** * Build a minimal, deterministic automaton from a sorted list of {@link BytesRef} representing * strings in UTF-8. These strings must be binary-sorted. */ public static Automaton build(Collection<BytesRef> input) { final DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder(); char[] chars = new char[0]; CharsRef ref = new CharsRef(); for (BytesRef b : input) { chars = ArrayUtil.grow(chars, b.length); final int len = UnicodeUtil.UTF8toUTF16(b, chars); ref.chars = chars; ref.length = len; builder.add(ref); } Automaton.Builder a = new Automaton.Builder(); convert(a, builder.complete(), new IdentityHashMap<State,Integer>()); return a.finish(); }