/** * Returns the normalized form of the source string. * @param src source string * @return normalized src * @stable ICU 4.4 */ public String normalize(CharSequence src) { if(src instanceof String) { // Fastpath: Do not construct a new String if the src is a String // and is already normalized. int spanLength=spanQuickCheckYes(src); if(spanLength==src.length()) { return (String)src; } StringBuilder sb=new StringBuilder(src.length()).append(src, 0, spanLength); return normalizeSecondAndAppend(sb, src.subSequence(spanLength, src.length())).toString(); } return normalize(src, new StringBuilder(src.length())).toString(); }
@Override public String transform(String source) { return norm2.normalize(source); } }
@Override public int read(char[] cbuf, int off, int len) throws IOException { // Buffer all input on the first call. if (normalizedInput == null) { StringBuilder sbBuffer = new StringBuilder(); char[] temp = new char[MAX_BUFFER_SIZE]; for (int cnt = input.read(temp); cnt > 0; cnt = input.read(temp)) { sbBuffer.append(replaceSpecialCharacters(temp, cnt), 0, cnt); } normalizedInput = new StringReader(normalizer.normalize(sbBuffer.toString())); } return normalizedInput.read(cbuf, off, len); }
/** * Decompose a string. * The string will be decomposed to according to the specified mode. * @param str The string to decompose. * @param compat If true the string will be decomposed according to NFKD * rules and if false will be decomposed according to NFD * rules. * @param options The normalization options, ORed together (0 for no options). * @return String The decomposed string * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @Deprecated public static String decompose(String str, boolean compat, int options) { return getDecomposeNormalizer2(compat, options).normalize(str); }
@Override public int read(char[] cbuf, int off, int len) throws IOException { // Buffer all input on the first call. if (normalizedInput == null) { StringBuilder sbBuffer = new StringBuilder(); char[] temp = new char[MAX_BUFFER_SIZE]; for (int cnt = input.read(temp); cnt > 0; cnt = input.read(temp)) { sbBuffer.append(replaceSpecialCharacters(temp, cnt), 0, cnt); } normalizedInput = new StringReader(normalizer.normalize(sbBuffer.toString())); } return normalizedInput.read(cbuf, off, len); }
/** * Compose a string. * The string will be composed to according to the specified mode. * @param str The string to compose. * @param compat If true the string will be composed according to * NFKC rules and if false will be composed according to * NFC rules. * @param options The only recognized option is UNICODE_3_2 * @return String The composed string * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @Deprecated public static String compose(String str, boolean compat, int options) { return getComposeNormalizer2(compat, options).normalize(str); }
/** * Returns true if one index character string is "better" than the other. * Shorter NFKD is better, and otherwise NFKD-binary-less-than is * better, and otherwise binary-less-than is better. */ private static boolean isOneLabelBetterThanOther(Normalizer2 nfkdNormalizer, String one, String other) { // This is called with primary-equal strings, but never with one.equals(other). String n1 = nfkdNormalizer.normalize(one); String n2 = nfkdNormalizer.normalize(other); int result = n1.codePointCount(0, n1.length()) - n2.codePointCount(0, n2.length()); if (result != 0) { return result < 0; } result = binaryCmp.compare(n1, n2); if (result != 0) { return result < 0; } return binaryCmp.compare(one, other) < 0; }
/** * Normalizes a <tt>String</tt> using the given normalization operation. * <p> * The <tt>options</tt> parameter specifies which optional * <tt>Normalizer</tt> features are to be enabled for this operation. * Currently the only available option is {@link #UNICODE_3_2}. * If you want the default behavior corresponding to one of the standard * Unicode Normalization Forms, use 0 for this argument. * <p> * @param str the input string to be normalized. * @param mode the normalization mode * @param options the optional features to be enabled. * @return String the normalized string * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @Deprecated public static String normalize(String str, Mode mode, int options) { return mode.getNormalizer2(options).normalize(str); }
@Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { if (normalizer.quickCheck(termAtt) != Normalizer.YES) { buffer.setLength(0); normalizer.normalize(termAtt, buffer); termAtt.setEmpty().append(buffer); } return true; } else { return false; } } }
private Appendable normalize(CharSequence src, Appendable dest, UnicodeSet.SpanCondition spanCondition) { // Don't throw away destination buffer between iterations. StringBuilder tempDest=new StringBuilder(); try { for(int prevSpanLimit=0; prevSpanLimit<src.length();) { int spanLimit=set.span(src, prevSpanLimit, spanCondition); int spanLength=spanLimit-prevSpanLimit; if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) { if(spanLength!=0) { dest.append(src, prevSpanLimit, spanLimit); } spanCondition=UnicodeSet.SpanCondition.SIMPLE; } else { if(spanLength!=0) { // Not norm2.normalizeSecondAndAppend() because we do not want // to modify the non-filter part of dest. dest.append(norm2.normalize(src.subSequence(prevSpanLimit, spanLimit), tempDest)); } spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED; } prevSpanLimit=spanLimit; } } catch(IOException e) { throw new ICUUncheckedIOException(e); } return dest; }
@Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { if (normalizer.quickCheck(termAtt) != Normalizer.YES) { buffer.setLength(0); normalizer.normalize(termAtt, buffer); termAtt.setEmpty().append(buffer); } return true; } else { return false; } } }
source = nfd.normalize(newSource); done = false;
private boolean previousNormalize() { clearBuffer(); nextIndex=currentIndex; text.setIndex(currentIndex); StringBuilder segment=new StringBuilder(); int c; while((c=text.previousCodePoint())>=0) { if(c<=0xffff) { segment.insert(0, (char)c); } else { segment.insert(0, Character.toChars(c)); } if(norm2.hasBoundaryBefore(c)) { break; } } currentIndex=text.getIndex(); norm2.normalize(segment, buffer); bufferPos=buffer.length(); return buffer.length()!=0; }
/** * Compose a string. * The string will be composed to according to the specified mode. * @param src The char array to compose. * @param srcStart Start index of the source * @param srcLimit Limit index of the source * @param dest The char buffer to fill in * @param destStart Start index of the destination buffer * @param destLimit End index of the destination buffer * @param compat If true the char array will be composed according to * NFKC rules and if false will be composed according to * NFC rules. * @param options The normalization options, ORed together (0 for no options). * @return int The total buffer size needed;if greater than length of * result, the output was truncated. * @exception IndexOutOfBoundsException if target.length is less than the * required length * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @Deprecated public static int compose(char[] src,int srcStart, int srcLimit, char[] dest,int destStart, int destLimit, boolean compat, int options) { CharBuffer srcBuffer = CharBuffer.wrap(src, srcStart, srcLimit - srcStart); CharsAppendable app = new CharsAppendable(dest, destStart, destLimit); getComposeNormalizer2(compat, options).normalize(srcBuffer, app); return app.length(); }
/** * Decompose a string. * The string will be decomposed to according to the specified mode. * @param src The char array to compose. * @param srcStart Start index of the source * @param srcLimit Limit index of the source * @param dest The char buffer to fill in * @param destStart Start index of the destination buffer * @param destLimit End index of the destination buffer * @param compat If true the char array will be decomposed according to NFKD * rules and if false will be decomposed according to * NFD rules. * @param options The normalization options, ORed together (0 for no options). * @return int The total buffer size needed;if greater than length of * result,the output was truncated. * @exception IndexOutOfBoundsException if the target capacity is less than * the required length * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @Deprecated public static int decompose(char[] src,int srcStart, int srcLimit, char[] dest,int destStart, int destLimit, boolean compat, int options) { CharBuffer srcBuffer = CharBuffer.wrap(src, srcStart, srcLimit - srcStart); CharsAppendable app = new CharsAppendable(dest, destStart, destLimit); getDecomposeNormalizer2(compat, options).normalize(srcBuffer, app); return app.length(); }
/** * Normalize a string. * The string will be normalized according to the specified normalization * mode and options. * @param src The char array to compose. * @param srcStart Start index of the source * @param srcLimit Limit index of the source * @param dest The char buffer to fill in * @param destStart Start index of the destination buffer * @param destLimit End index of the destination buffer * @param mode The normalization mode; one of Normalizer.NONE, * Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC, * Normalizer.NFKD, Normalizer.DEFAULT * @param options The normalization options, ORed together (0 for no options). * @return int The total buffer size needed;if greater than length of * result, the output was truncated. * @exception IndexOutOfBoundsException if the target capacity is * less than the required length * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @Deprecated public static int normalize(char[] src,int srcStart, int srcLimit, char[] dest,int destStart, int destLimit, Mode mode, int options) { CharBuffer srcBuffer = CharBuffer.wrap(src, srcStart, srcLimit - srcStart); CharsAppendable app = new CharsAppendable(dest, destStart, destLimit); mode.getNormalizer2(options).normalize(srcBuffer, app); return app.length(); }
private boolean nextNormalize() { clearBuffer(); currentIndex=nextIndex; text.setIndex(nextIndex); // Skip at least one character so we make progress. int c=text.nextCodePoint(); if(c<0) { return false; } StringBuilder segment=new StringBuilder().appendCodePoint(c); while((c=text.nextCodePoint())>=0) { if(norm2.hasBoundaryBefore(c)) { text.moveCodePointIndex(-1); break; } segment.appendCodePoint(c); } nextIndex=text.getIndex(); norm2.normalize(segment, buffer); return buffer.length()!=0; }
return ensureValidString(cx, () -> normalizer.normalize(s));