/** * Implementation of UnicodeMatcher API. Union the set of all * characters that may be matched by this object into the given * set. * @param toUnionTo the set into which to union the source characters * @stable ICU 2.2 */ @Override public void addMatchSetTo(UnicodeSet toUnionTo) { toUnionTo.addAll(this); }
/** * Add the contents of the collection (as strings) into this UnicodeSet. * The collection must not contain null. * @param source the collection to add * @return a reference to this object * @stable ICU 4.4 */ public UnicodeSet add(Iterable<?> source) { return addAll(source); }
/** Implements CollationRuleParser.Sink. */ @Override void optimize(UnicodeSet set) { optimizeSet.addAll(set); }
/** * Add more index characters (aside from what are in the locale) * @param additions additional characters to add to the index, such as A-Z. * @return this, for chaining * @stable ICU 4.8 */ public AlphabeticIndex<V> addLabels(UnicodeSet additions) { initialLabels.addAll(additions); buckets = null; return this; }
@Override public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { // intersect myFilter with the input filter UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter); sourceSet.addAll(myFilter); // do nothing with the target } }
/** * Union the set of all characters that may output by this object * into the given set. * @param toUnionTo the set into which to union the output characters */ @Override public void addReplacementSetTo(UnicodeSet toUnionTo) { toUnionTo.addAll(translit.getTargetSet()); } }
@Override public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter); // Assume that it can modify any character to any other character sourceSet.addAll(myFilter); if (myFilter.size() != 0) { targetSet.addAll(0, 0x10FFFF); } } }
/** * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"} * @param s the source string * @return a newly created set containing the given characters * @stable ICU 2.0 */ public static UnicodeSet fromAll(CharSequence s) { return new UnicodeSet().addAll(s); }
@Override public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { sourceSet.addAll(getFilterAsUnicodeSet(inputFilter)); for (EscapeTransliterator it = this; it != null ; it = it.supplementalHandler) { if (inputFilter.size() != 0) { targetSet.addAll(it.prefix); targetSet.addAll(it.suffix); StringBuilder buffer = new StringBuilder(); for (int i = 0; i < it.radix; ++i) { Utility.appendNumber(buffer, i, it.radix, it.minDigits); } targetSet.addAll(buffer.toString()); // TODO drop once String is changed to CharSequence in UnicodeSet } } } }
public void addSourceTargetSet(Transliterator transliterator, UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet myFilter = transliterator.getFilterAsUnicodeSet(inputFilter); UnicodeSet affectedCharacters = new UnicodeSet(sourceCache).retainAll(myFilter); sourceSet.addAll(affectedCharacters); for (String s : affectedCharacters) { targetSet.addAll(transform.transform(s)); } for (String s : sourceStrings) { if (myFilter.containsAll(s)) { String t = transform.transform(s); if (!s.equals(t)) { targetSet.addAll(t); sourceSet.addAll(s); } } } } }
@Override public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter); // Doesn't actually modify the source characters, so leave them alone. // add the characters inserted if (myFilter.size() != 0) { targetSet.addAll(insertion); } }
@Override public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter); if (myFilter.size() > 0) { sourceSet.addAll(myFilter); targetSet.addAll('0', '9') .addAll('A', 'Z') .add('-') .add(' ') .addAll(OPEN_DELIM) .add(CLOSE_DELIM) .addAll('a', 'z') // for controls .add('<').add('>') // for controls .add('(').add(')') // for controls ; } } }
void addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet currentFilter = new UnicodeSet(filter); UnicodeSet revisiting = new UnicodeSet(); int count = ruleVector.size(); for (int i=0; i<count; ++i) { TransliterationRule r = ruleVector.get(i); r.addSourceTargetSet(currentFilter, sourceSet, targetSet, revisiting.clear()); currentFilter.addAll(revisiting); } }
public CjkBreakEngine(boolean korean) throws IOException { super(BreakIterator.KIND_WORD); fDictionary = DictionaryData.loadDictionaryFor("Hira"); if (korean) { setCharacters(fHangulWordSet); } else { //Chinese and Japanese UnicodeSet cjSet = new UnicodeSet(); cjSet.addAll(fHanWordSet); cjSet.addAll(fKatakanaWordSet); cjSet.addAll(fHiraganaWordSet); cjSet.add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK cjSet.add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK setCharacters(cjSet); } }
private void addScriptChars(ULocale locale, UnicodeSet allowedChars) { int scripts[] = UScript.getCode(locale); if (scripts != null) { UnicodeSet tmpSet = new UnicodeSet(); for (int i = 0; i < scripts.length; i++) { tmpSet.applyIntPropertyValue(UProperty.SCRIPT, scripts[i]); allowedChars.addAll(tmpSet); } } // else it's an unknown script. // Maybe they asked for the script of "zxx", which refers to no linguistic content. // Maybe they asked for the script of a newer locale that we don't know in the older version of ICU. }
/** * @see #retainAll(com.ibm.icu.text.UnicodeSet) * @stable ICU 4.4 */ public <T extends CharSequence> UnicodeSet retainAll(Iterable<T> collection) { checkFrozen(); // TODO optimize UnicodeSet toRetain = new UnicodeSet(); toRetain.addAll(collection); retainAll(toRetain); return this; }
/** * @internal */ @Override public void addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet myFilter = new UnicodeSet(getFilterAsUnicodeSet(filter)); UnicodeSet tempTargetSet = new UnicodeSet(); for (int i=0; i<trans.length; ++i) { // each time we produce targets, those can be used by subsequent items, despite the filter. // so we get just those items, and add them to the filter each time. tempTargetSet.clear(); trans[i].addSourceTargetSet(myFilter, sourceSet, tempTargetSet); targetSet.addAll(tempTargetSet); myFilter.addAll(tempTargetSet); } }
private static UnicodeSet subUnicodeSet(String pattern1, String pattern2) { UnicodeSet unicodeSet = new UnicodeSet(); unicodeSet.addAll(new UnicodeSet(pattern1)); unicodeSet.removeAll(new UnicodeSet(pattern2)); return unicodeSet; }
private static UnicodeSet subUnicodeSet(final String pattern1, final String pattern2) { final UnicodeSet unicodeSet = new UnicodeSet(); unicodeSet.addAll(new UnicodeSet(pattern1)); unicodeSet.removeAll(new UnicodeSet(pattern2)); return unicodeSet; }
private void initializePNJSets() { if (PNJ_BINDI_TIPPI_SET != null && PNJ_CONSONANT_SET != null) { return; } PNJ_BINDI_TIPPI_SET = new UnicodeSet(); PNJ_CONSONANT_SET = new UnicodeSet(); PNJ_CONSONANT_SET.add(0x0a15, 0x0a28); PNJ_CONSONANT_SET.add(0x0a2a, 0x0a30); PNJ_CONSONANT_SET.add(0x0a35, 0x0a36); PNJ_CONSONANT_SET.add(0x0a38, 0x0a39); PNJ_BINDI_TIPPI_SET.addAll(PNJ_CONSONANT_SET); PNJ_BINDI_TIPPI_SET.add(0x0a05); PNJ_BINDI_TIPPI_SET.add(0x0a07); PNJ_BINDI_TIPPI_SET.add(0x0a41, 0x0a42); PNJ_BINDI_TIPPI_SET.add(0x0a3f); PNJ_CONSONANT_SET.compact(); PNJ_BINDI_TIPPI_SET.compact(); }