/** * Create a new Normalizer2CharFilter that combines NFKC normalization, Case * Folding, and removes Default Ignorables (NFKC_Casefold) */ public ICUNormalizer2CharFilter(Reader in) { this(in, Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE)); }
@Override int getValue(int c) { return Normalizer2.getNFDInstance().getCombiningClass(c); } },
switch (f) { case "NFC": normalizer = Normalizer2.getNFCInstance(); break; case "NFD": normalizer = Normalizer2.getNFDInstance(); break; case "NFKC": normalizer = Normalizer2.getNFKCInstance(); break; case "NFKD": normalizer = Normalizer2.getNFKDInstance(); break; default: return ensureValidString(cx, () -> normalizer.normalize(s));
/** * Returns the normalized form of the source string. * @param src source string * @return normalized src * @stable ICU 4.4 */ public String normalize(CharSequence src) { if(src instanceof String) { // Fastpath: Do not construct a new String if the src is a String // and is already normalized. int spanLength=spanQuickCheckYes(src); if(spanLength==src.length()) { return (String)src; } StringBuilder sb=new StringBuilder(src.length()).append(src, 0, spanLength); return normalizeSecondAndAppend(sb, src.subSequence(spanLength, src.length())).toString(); } return normalize(src, new StringBuilder(src.length())).toString(); }
@Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { if (normalizer.quickCheck(termAtt) != Normalizer.YES) { buffer.setLength(0); normalizer.normalize(termAtt, buffer); termAtt.setEmpty().append(buffer); } return true; } else { return false; } } }
private boolean previousNormalize() { clearBuffer(); nextIndex=currentIndex; text.setIndex(currentIndex); StringBuilder segment=new StringBuilder(); int c; while((c=text.previousCodePoint())>=0) { if(c<=0xffff) { segment.insert(0, (char)c); } else { segment.insert(0, Character.toChars(c)); } if(norm2.hasBoundaryBefore(c)) { break; } } currentIndex=text.getIndex(); norm2.normalize(segment, buffer); bufferPos=buffer.length(); return buffer.length()!=0; }
segment.appendCodePoint(c); start += Character.charCount(c); } while(start < limit && !norm2.hasBoundaryBefore(c = text.char32At(start))); if(start == limit && isIncremental && !norm2.hasBoundaryAfter(c)) { break; norm2.normalize(segment, normalized); if(!Normalizer2Impl.UTF16Plus.equal(segment, normalized)) {
if(sourceIndex >= nfdString.length()) { break; } sourceChar = Character.codePointAt(nfdString, sourceIndex); sourceCC = nfd.getCombiningClass(sourceChar); assert(sourceCC != 0); decompCC = nfd.getCombiningClass(decompChar); newNFDString.append(decomp, decompIndex, decomp.length()); assert(nfd.isNormalized(newNFDString)); assert(fcd.isNormalized(newString)); assert(nfd.normalize(newString).equals(newNFDString.toString())); // canonically equivalent return true;
Info info) { if(mappingStart==0) { uts46Norm2.normalize(src, dest); } else { uts46Norm2.normalizeSecondAndAppend(dest, src.subSequence(mappingStart, src.length()));
/** * Performing quick check on a string, to quickly determine if the string is * in a particular normalization format. * Three types of result can be returned Normalizer.YES, Normalizer.NO or * Normalizer.MAYBE. Result Normalizer.YES indicates that the argument * string is in the desired normalized format, Normalizer.NO determines that * argument string is not in the desired normalized format. A * Normalizer.MAYBE result indicates that a more thorough check is required, * the user may have to put the string in its normalized form and compare * the results. * * @param source string for determining if it is in a normalized format * @param mode normalization format (Normalizer.NFC,Normalizer.NFD, * Normalizer.NFKC,Normalizer.NFKD) * @param options Options for use with exclusion set and tailored Normalization * The only option that is currently recognized is UNICODE_3_2 * @return Return code to specify if the text is normalized or not * (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE) * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @Deprecated public static QuickCheckResult quickCheck(String source, Mode mode, int options) { return mode.getNormalizer2(options).quickCheck(source); }
/** * Normalize a codepoint according to the given mode * @param char32 The input string to be normalized. * @param mode The normalization mode * @param options Options for use with exclusion set and tailored Normalization * The only option that is currently recognized is UNICODE_3_2 * @return String The normalized string * @see #UNICODE_3_2 * @deprecated ICU 56 Use {@link Normalizer2} instead. */ @Deprecated public static String normalize(int char32, Mode mode, int options) { if(mode == NFD && options == 0) { String decomposition = Normalizer2.getNFCInstance().getDecomposition(char32); if(decomposition == null) { decomposition = UTF16.valueOf(char32); } return decomposition; } return normalize(UTF16.valueOf(char32), mode, options); }
@Override public Transliterator getInstance(String ID) { return new NormalizationTransliterator("NFD", Normalizer2.getNFDInstance()); } });
@Override public Transliterator getInstance(String ID) { return new NormalizationTransliterator("NFKD", Normalizer2.getNFKDInstance()); } });
@Override public Transliterator getInstance(String ID) { return new NormalizationTransliterator("NFC", Normalizer2.getNFCInstance()); } });
@Override public Transliterator getInstance(String ID) { return new NormalizationTransliterator("NFKC", Normalizer2.getNFKCInstance()); } });
@Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { if (normalizer.quickCheck(termAtt) != Normalizer.YES) { buffer.setLength(0); normalizer.normalize(termAtt, buffer); termAtt.setEmpty().append(buffer); } return true; } else { return false; } } }
private boolean nextNormalize() { clearBuffer(); currentIndex=nextIndex; text.setIndex(nextIndex); // Skip at least one character so we make progress. int c=text.nextCodePoint(); if(c<0) { return false; } StringBuilder segment=new StringBuilder().appendCodePoint(c); while((c=text.nextCodePoint())>=0) { if(norm2.hasBoundaryBefore(c)) { text.moveCodePointIndex(-1); break; } segment.appendCodePoint(c); } nextIndex=text.getIndex(); norm2.normalize(segment, buffer); return buffer.length()!=0; }
/** * {@inheritDoc} * @stable ICU 4.4 */ @Override public Normalizer.QuickCheckResult quickCheck(CharSequence s) { Normalizer.QuickCheckResult result=Normalizer.YES; UnicodeSet.SpanCondition spanCondition=UnicodeSet.SpanCondition.SIMPLE; for(int prevSpanLimit=0; prevSpanLimit<s.length();) { int spanLimit=set.span(s, prevSpanLimit, spanCondition); if(spanCondition==UnicodeSet.SpanCondition.NOT_CONTAINED) { spanCondition=UnicodeSet.SpanCondition.SIMPLE; } else { Normalizer.QuickCheckResult qcResult= norm2.quickCheck(s.subSequence(prevSpanLimit, spanLimit)); if(qcResult==Normalizer.NO) { return qcResult; } else if(qcResult==Normalizer.MAYBE) { result=qcResult; } spanCondition=UnicodeSet.SpanCondition.NOT_CONTAINED; } prevSpanLimit=spanLimit; } return result; } /**
public CollationBuilder(CollationTailoring b) { nfd = Normalizer2.getNFDInstance(); fcd = Norm2AllModes.getFCDNormalizer2(); nfcImpl = Norm2AllModes.getNFCInstance().impl; base = b; baseData = b.data; rootElements = new CollationRootElements(b.data.rootElements); variableTop = 0; dataBuilder = new CollationDataBuilder(); fastLatinEnabled = true; cesLength = 0; rootPrimaryIndexes = new UVector32(); nodes = new UVector64(); nfcImpl.ensureCanonIterData(); dataBuilder.initForTailoring(baseData); }