if (set.isEmpty()) { throw new UnsupportedOperationException( "AlphabeticIndex requires script-first-primary contractions");
/** Creates a new ICUFoldingFilterFactory */ public ICUFoldingFilterFactory(Map<String,String> args) { super(args); Normalizer2 normalizer = ICUFoldingFilter.NORMALIZER; String filter = get(args, "filter"); if (filter != null) { UnicodeSet set = new UnicodeSet(filter); if (!set.isEmpty()) { set.freeze(); normalizer = new FilteredNormalizer2(normalizer, set); } } if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } this.normalizer = normalizer; }
/** * Add Chinese index characters from the tailoring. */ private boolean addChineseIndexCharacters() { UnicodeSet contractions = new UnicodeSet(); try { collatorPrimaryOnly.internalAddContractions(BASE.charAt(0), contractions); } catch (Exception e) { return false; } if (contractions.isEmpty()) { return false; } initialLabels.addAll(contractions); for (String s : contractions) { assert(s.startsWith(BASE)); char c = s.charAt(s.length() - 1); if (0x41 <= c && c <= 0x5A) { // A-Z // There are Pinyin labels, add ASCII A-Z labels as well. initialLabels.add(0x41, 0x5A); // A-Z break; } } return true; }
/** * Create a new ICUTransformFilter that transforms text on the given stream. * * @param input {@link TokenStream} to filter. * @param transform Transliterator to transform the text. */ public ICUTransformFilter(TokenStream input, Transliterator transform) { super(input); this.transform = transform; /* * This is cheating, but speeds things up a lot. * If we wanted to use pkg-private APIs we could probably do better. */ if (transform.getFilter() == null && transform instanceof com.ibm.icu.text.RuleBasedTransliterator) { final UnicodeSet sourceSet = transform.getSourceSet(); if (sourceSet != null && !sourceSet.isEmpty()) transform.setFilter(sourceSet); } }
/** * Create a new ICUTransformFilter that transforms text on the given stream. * * @param input {@link TokenStream} to filter. * @param transform Transliterator to transform the text. */ public ICUTransformFilter(TokenStream input, Transliterator transform) { super(input); this.transform = transform; /* * This is cheating, but speeds things up a lot. * If we wanted to use pkg-private APIs we could probably do better. */ if (transform.getFilter() == null && transform instanceof com.ibm.icu.text.RuleBasedTransliterator) { final UnicodeSet sourceSet = transform.getSourceSet(); if (sourceSet != null && !sourceSet.isEmpty()) transform.setFilter(sourceSet); } }
/** Creates a new ICUNormalizer2CharFilterFactory */ public ICUNormalizer2CharFilterFactory(Map<String,String> args) { super(args); String name = get(args, "name", "nfkc_cf"); String mode = get(args, "mode", Arrays.asList("compose", "decompose"), "compose"); Normalizer2 normalizer = Normalizer2.getInstance (null, name, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE); String filter = get(args, "filter"); if (filter != null) { UnicodeSet set = new UnicodeSet(filter); if (!set.isEmpty()) { set.freeze(); normalizer = new FilteredNormalizer2(normalizer, set); } } if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } this.normalizer = normalizer; }
/** Creates a new ICUNormalizer2FilterFactory */ public ICUNormalizer2FilterFactory(Map<String,String> args) { super(args); String name = get(args, "name", "nfkc_cf"); String mode = get(args, "mode", Arrays.asList("compose", "decompose"), "compose"); Normalizer2 normalizer = Normalizer2.getInstance (null, name, "compose".equals(mode) ? Normalizer2.Mode.COMPOSE : Normalizer2.Mode.DECOMPOSE); String filter = get(args, "filter"); if (filter != null) { UnicodeSet set = new UnicodeSet(filter); if (!set.isEmpty()) { set.freeze(); normalizer = new FilteredNormalizer2(normalizer, set); } } if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } this.normalizer = normalizer; }
if (uset.isEmpty()) {
void optimize(UnicodeSet set) { if(set.isEmpty()) { return; } UnicodeSetIterator iter = new UnicodeSetIterator(set); while(iter.next() && iter.codepoint != UnicodeSetIterator.IS_STRING) { int c = iter.codepoint; int ce32 = trie.get(c); if(ce32 == Collation.FALLBACK_CE32) { ce32 = base.getFinalCE32(base.getCE32(c)); ce32 = copyFromBaseCE32(c, ce32, true); trie.set(c, ce32); } } modified = true; }
if (mustNotBeEmpty && isEmpty()) {
void suppressContractions(UnicodeSet set) { if(set.isEmpty()) { return; } UnicodeSetIterator iter = new UnicodeSetIterator(set); while(iter.next() && iter.codepoint != UnicodeSetIterator.IS_STRING) { int c = iter.codepoint; int ce32 = trie.get(c); if(ce32 == Collation.FALLBACK_CE32) { ce32 = base.getFinalCE32(base.getCE32(c)); if(Collation.ce32HasContext(ce32)) { ce32 = copyFromBaseCE32(c, ce32, false /* without context */); trie.set(c, ce32); } } else if(isBuilderContextCE32(ce32)) { ce32 = getConditionalCE32ForCE32(ce32).ce32; // Simply abandon the list of ConditionalCE32. // The caller will copy this builder in the end, // eliminating unreachable data. trie.set(c, ce32); contextChars.remove(c); } } modified = true; }
if (!excludedCodePoints.isEmpty()) { int itemCount = excludedCodePoints.getRangeCount(); for (int j = 0; j < itemCount; ++j) {