/** * Gets unicode sets containing contractions and/or expansions of a collator * * @param contractions * if not null, set to contain contractions * @param expansions * if not null, set to contain expansions * @param addPrefixes * add the prefix contextual elements to contractions * @throws Exception * Throws an exception if any errors occurs. * @stable ICU 3.4 */ public void getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, boolean addPrefixes) throws Exception { if (contractions != null) { contractions.clear(); } if (expansions != null) { expansions.clear(); } new ContractionsAndExpansions(contractions, expansions, null, addPrefixes).forData(data); }
private int processSet(String regex, int i, StringBuilder result, UnicodeSet temp, ParsePosition pos) { try { pos.setIndex(i); UnicodeSet x = temp.clear().applyPattern(regex, pos, symbolTable, 0); x.complement().complement(); // hack to fix toPattern result.append(x.toPattern(false)); i = pos.getIndex() - 1; // allow for the loop increment return i; } catch (Exception e) { throw (IllegalArgumentException) new IllegalArgumentException("Error in " + regex).initCause(e); } }
/** * Returns the set of Unicode code points that can be converted by an ICU Converter. * * <p>The current implementation returns only one kind of set (UCNV_ROUNDTRIP_SET): The set of all Unicode code points that can be * roundtrip-converted (converted without any data loss) with the converter This set will not include code points that have fallback * mappings or are only the result of reverse fallback mappings. See UTR #22 "Character Mapping Markup Language" at <a href="http://www.unicode.org/reports/tr22/">http://www.unicode.org/reports/tr22/</a> * * <p>In the future, there may be more UConverterUnicodeSet choices to select sets with different properties. * * <p>This is useful for example for * <ul><li>checking that a string or document can be roundtrip-converted with a converter, * without/before actually performing the conversion</li> * <li>testing if a converter can be used for text for typical text for a certain locale, * by comparing its roundtrip set with the set of ExemplarCharacters from * ICU's locale data or other sources</li></ul> * * @param setFillIn A valid UnicodeSet. It will be cleared by this function before * the converter's specific set is filled in. * @param which A selector; currently ROUNDTRIP_SET is the only supported value. * @throws IllegalArgumentException if the parameters does not match. * @stable ICU 4.0 */ public void getUnicodeSet(UnicodeSet setFillIn, int which){ if( setFillIn == null || which != ROUNDTRIP_SET ){ throw new IllegalArgumentException(); } setFillIn.clear(); getUnicodeSetImpl(setFillIn, which); }
/** * Computes the set of numerics for a string, according to UTS 39 section 5.3. */ private void getNumerics(String input, UnicodeSet result) { result.clear(); for (int utf16Offset = 0; utf16Offset < input.length();) { int codePoint = Character.codePointAt(input, utf16Offset); utf16Offset += Character.charCount(codePoint); // Store a representative character for each kind of decimal digit if (UCharacter.getType(codePoint) == UCharacterCategory.DECIMAL_DIGIT_NUMBER) { // Store the zero character as a representative for comparison. // Unicode guarantees it is codePoint - value result.add(codePoint - UCharacter.getNumericValue(codePoint)); } } }
/** * Make this object represent the range <code>start - end</code>. * If <code>end > start</code> then this object is set to an * an empty range. * * @param start first character in the set, inclusive * @param end last character in the set, inclusive * @stable ICU 2.0 */ public UnicodeSet set(int start, int end) { checkFrozen(); clear(); complement(start, end); return this; }
void addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet currentFilter = new UnicodeSet(filter); UnicodeSet revisiting = new UnicodeSet(); int count = ruleVector.size(); for (int i=0; i<count; ++i) { TransliterationRule r = ruleVector.get(i); r.addSourceTargetSet(currentFilter, sourceSet, targetSet, revisiting.clear()); currentFilter.addAll(revisiting); } }
/** * Retain the specified string in this set if it is present. * Upon return this set will be empty if it did not contain s, or * will only contain s if it did contain s. * @param cs the string to be retained * @return this object, for chaining * @stable ICU 2.0 */ public final UnicodeSet retain(CharSequence cs) { int cp = getSingleCP(cs); if (cp < 0) { String s = cs.toString(); boolean isIn = strings.contains(s); if (isIn && size() == 1) { return this; } clear(); strings.add(s); pat = null; } else { retain(cp, cp); } return this; }
/** * Converts the char set cset into a Unicode set uset. * Equivalent to charSetToUSet. * @param set Set of 256 bit flags corresponding to a set of chars. * @param uset USet to receive characters. Existing contents are deleted. */ private void convert(int set[], UnicodeSet uset) { uset.clear(); if (!initNameSetsLengths()) { return; } // build a char string with all chars that are used in character names for (char c = 255; c > 0; c --) { if (contains(set, c)) { uset.add(c); } } } }
/** * Retain only the elements in this set that are contained in the * specified range. If <code>end > start</code> then an empty range is * retained, leaving the set empty. * * @param start first character, inclusive, of range to be retained * to this set. * @param end last character, inclusive, of range to be retained * to this set. * @stable ICU 2.0 */ public UnicodeSet retain(int start, int end) { checkFrozen(); if (start < MIN_VALUE || start > MAX_VALUE) { throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(start, 6)); } if (end < MIN_VALUE || end > MAX_VALUE) { throw new IllegalArgumentException("Invalid code point U+" + Utility.hex(end, 6)); } if (start <= end) { retain(range(start, end), 2, 0); } else { clear(); } return this; }
fAllowedCharsSet.clear();
/** * @internal */ @Override public void addSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet myFilter = new UnicodeSet(getFilterAsUnicodeSet(filter)); UnicodeSet tempTargetSet = new UnicodeSet(); for (int i=0; i<trans.length; ++i) { // each time we produce targets, those can be used by subsequent items, despite the filter. // so we get just those items, and add them to the filter each time. tempTargetSet.clear(); trans[i].addSourceTargetSet(myFilter, sourceSet, tempTargetSet); targetSet.addAll(tempTargetSet); myFilter.addAll(tempTargetSet); } }
return false; set.clear(); int value=canonValue&CANON_VALUE_MASK; if((canonValue&CANON_HAS_SET)!=0) {
firstNonspacingMark = 0; if (haveMultipleMarks) { marksSeenSoFar.clear(); haveMultipleMarks = false;
throw new IllegalArgumentException("Invalid character name"); clear(); add_unchecked(ch); return this;
clear(); String lastString = null;
temp.clear(); matcher.addMatchSetTo(temp); if (!filter.containsSome(temp)) {