com.ibm.icu.text.RuleBasedCollator java code examples

collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
collator.setNumericCollation(numeric);
switch (caseFirst) {
case "upper":
  collator.setUpperCaseFirst(true);
  break;
case "lower":
  collator.setLowerCaseFirst(true);
  break;
case "false":
  if (collator.isLowerCaseFirst()) {
    collator.setLowerCaseFirst(false);
  if (collator.isUpperCaseFirst()) {
    collator.setUpperCaseFirst(false);
  collator.setStrength(Collator.PRIMARY);
  break;
case "accent":
  collator.setStrength(Collator.SECONDARY);
  break;
case "case":
  collator.setStrength(Collator.PRIMARY);
  collator.setCaseLevel(true);
  break;
case "variant":
  collator.setStrength(Collator.TERTIARY);
  break;
default:

  String rules = collator.getRules() + extras;
  collator = new RuleBasedCollator(rules);
collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);

/**
 * Gets the simpler form of a CollationKey for the String source following the rules of this Collator and stores the
 * result into the user provided argument key. If key has a internal byte array of length that's too small for the
 * result, the internal byte array will be grown to the exact required size.
 * 
 * @param source the text String to be transformed into a RawCollationKey
 * @param key output RawCollationKey to store results
 * @return If key is null, a new instance of RawCollationKey will be created and returned, otherwise the user
 *         provided key will be returned.
 * @see #getCollationKey
 * @see #compare(String, String)
 * @see RawCollationKey
 * @stable ICU 2.8
 */
@Override
public RawCollationKey getRawCollationKey(String source, RawCollationKey key) {
  if (source == null) {
    return null;
  }
  CollationBuffer buffer = null;
  try {
    buffer = getCollationBuffer();
    return getRawCollationKey(source, key, buffer);
  } finally {
    releaseCollationBuffer(buffer);
  }
}

  log.info("strength=" + str);
collator.setStrength(str);
  collator.setStrength(Collator.PRIMARY);
  break;
  collator.setStrength(Collator.SECONDARY);
  break;
  collator.setStrength(Collator.TERTIARY);
  break;
  collator.setStrength(Collator.QUATERNARY);
  break;
  collator.setStrength(Collator.IDENTICAL);
  break;
collator.setDecomposition(Collator.NO_DECOMPOSITION);
break;
collator.setDecomposition(Collator.FULL_DECOMPOSITION);
break;
collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
break;

    collator = new RuleBasedCollator(rules);
  } catch (Exception e) {
    if (failureToResolve != null) {
if (alternate != null) {
  if (alternate.equalsIgnoreCase("shifted")) {
    rbc.setAlternateHandlingShifted(true);
  } else if (alternate.equalsIgnoreCase("non-ignorable")) {
    rbc.setAlternateHandlingShifted(false);
  } else {
    throw new IllegalArgumentException("Invalid alternate: " + alternate);
  rbc.setCaseLevel(caseLevel);
if (caseFirst != null) {
  if (caseFirst.equalsIgnoreCase("lower")) {
    rbc.setLowerCaseFirst(true);
  } else if (caseFirst.equalsIgnoreCase("upper")) {
    rbc.setUpperCaseFirst(true);
  } else {
    throw new IllegalArgumentException("Invalid caseFirst: " + caseFirst);
  rbc.setNumericCollation(numeric);
  rbc.setVariableTop(variableTop);
  rbc.setHiraganaQuaternary(hiraganaQuaternaryMode);

if (value != null) {
  if (rbc != null) {
    rbc.setFrenchCollation(getYesOrNo("colBackwards", value));
  } else {
    throw new UnsupportedOperationException(
if (value != null) {
  if (rbc != null) {
    rbc.setCaseLevel(getYesOrNo("colCaseLevel", value));
  } else {
    throw new UnsupportedOperationException(
    int cf = getIntValue("colCaseFirst", value, "no", "lower", "upper");
    if (cf == 0) {
      rbc.setLowerCaseFirst(false);
      rbc.setUpperCaseFirst(false);
    } else if (cf == 1) {
      rbc.setLowerCaseFirst(true);
    } else /* cf == 2 */ {
      rbc.setUpperCaseFirst(true);
if (value != null) {
  if (rbc != null) {
    rbc.setAlternateHandlingShifted(
        getIntValue("colAlternate", value, "non-ignorable", "shifted") != 0);
  } else {
if (value != null) {
  if (rbc != null) {

    .getInstance(locale);
collator.setStrength(Collator.TERTIARY);
collator.setDecomposition(Collator.NO_DECOMPOSITION);
collator.getRawCollationKey(input, raw);

collatorOriginal = collator != null ? collator : (RuleBasedCollator) Collator.getInstance(locale);
try {
  collatorPrimaryOnly = collatorOriginal.cloneAsThawed();
} catch (Exception e) {
collatorPrimaryOnly.setStrength(Collator.PRIMARY);
collatorPrimaryOnly.freeze();
        "AlphabeticIndex requires some non-ignorable script boundary strings");
  if (collatorPrimaryOnly.compare(firstCharsInScripts.get(0), "") == 0) {
    firstCharsInScripts.remove(0);
  } else {

  public static int indexOfIgnoreCase(String haystack, String needle) {
    StringSearch stringSearch = new StringSearch(needle, haystack);
    stringSearch.getCollator().setStrength(Collator.PRIMARY);
    return stringSearch.first();
  }
}

  public void appendSortKey(final KeyBuilder keyBuilder, final String s) {

//        RawCollationKey raw = collator.getRawCollationKey(s, null);
    
    collator.getRawCollationKey(s, raw);

    keyBuilder.append(raw.bytes, 0, raw.size - 1/*
                           * do not include the nul
                           * byte
                           */);

  }

/**
 * Uses a Locale dependent Collator to generate a sort string
 *
 * @param str The string to parse
 * @return String the sort ordering text
 */
@Override
public String filter(String str) {
  RuleBasedCollator collator = getCollator();
  // Have we got a collator?
  if (collator != null) {
    int element;
    StringBuffer buf = new StringBuffer();
    // Iterate through the elements of the collator
    CollationElementIterator iter = collator.getCollationElementIterator(str);
    while ((element = iter.next()) != CollationElementIterator.NULLORDER) {
      // Generate a hexadecimal string representation of the Collation element
      // This can then be compared in a text sort ;-)
      String test = Integer.toString(element, 16);
      buf.append(test);
    }
    return buf.toString();
  }
  return str;
}

private static final Collator makeInstance(ULocale desiredLocale) {
  Output<ULocale> validLocale = new Output<ULocale>(ULocale.ROOT);
  CollationTailoring t =
    CollationLoader.loadTailoring(desiredLocale, validLocale);
  return new RuleBasedCollator(t, validLocale.value);
}

    collator = new RuleBasedCollator(rules);
  } catch (Exception e) {
    if (failureToResolve != null) {
if (alternate != null) {
  if (alternate.equalsIgnoreCase("shifted")) {
    rbc.setAlternateHandlingShifted(true);
  } else if (alternate.equalsIgnoreCase("non-ignorable")) {
    rbc.setAlternateHandlingShifted(false);
  } else {
    throw new ElasticsearchIllegalArgumentException("Invalid alternate: " + alternate);
  rbc.setCaseLevel(caseLevel);
if (caseFirst != null) {
  if (caseFirst.equalsIgnoreCase("lower")) {
    rbc.setLowerCaseFirst(true);
  } else if (caseFirst.equalsIgnoreCase("upper")) {
    rbc.setUpperCaseFirst(true);
  } else {
    throw new ElasticsearchIllegalArgumentException("Invalid caseFirst: " + caseFirst);
  rbc.setNumericCollation(numeric);
  rbc.setVariableTop(variableTop);
  rbc.setHiraganaQuaternary(hiraganaQuaternaryMode);

    .getInstance(locale);
collator.setStrength(Collator.TERTIARY);
collator.setDecomposition(Collator.NO_DECOMPOSITION);
collator.getRawCollationKey(input, raw);

  log.info("strength=" + str);
collator.setStrength(str);
  collator.setStrength(Collator.PRIMARY);
  break;
  collator.setStrength(Collator.SECONDARY);
  break;
  collator.setStrength(Collator.TERTIARY);
  break;
  collator.setStrength(Collator.QUATERNARY);
  break;
  collator.setStrength(Collator.IDENTICAL);
  break;
collator.setDecomposition(Collator.NO_DECOMPOSITION);
break;
collator.setDecomposition(Collator.FULL_DECOMPOSITION);
break;
collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
break;

  public void appendSortKey(final KeyBuilder keyBuilder, final String s) {

//        RawCollationKey raw = collator.getRawCollationKey(s, null);
    
    collator.getRawCollationKey(s, raw);

    keyBuilder.append(raw.bytes, 0, raw.size - 1/*
                           * do not include the nul
                           * byte
                           */);

  }

public boolean allIgnorable(String s) {
  CollationElementIterator iter = collator.getCollationElementIterator(s);
  int o = iter.next();
  while (o != CollationElementIterator.NULLORDER
      && CollationElementIterator.primaryOrder(o) == 0) {
    o = iter.next();
  }
  return o == CollationElementIterator.NULLORDER;
}

private CollationKey getCollationKey(String source, CollationBuffer buffer) {
  buffer.rawCollationKey = getRawCollationKey(source, buffer.rawCollationKey, buffer);
  return new CollationKey(source, buffer.rawCollationKey);
}

CollationElementIterator strIter = collator.getCollationElementIterator(str);
CollationElementIterator prefixIter = collator.getCollationElementIterator(prefix);

@SuppressWarnings("unused")
public int[] findText2(String str, String key, int startingAt) {
  CollationElementIterator strIter = collator.getCollationElementIterator(str);
  CollationElementIterator keyIter = collator.getCollationElementIterator(key);

Javadoc

RuleBasedCollator is a concrete subclass of Collator. It allows customization of the Collator via user-specified rule sets. RuleBasedCollator is designed to be fully compliant to the Unicode Collation Algorithm (UCA) and conforms to ISO 14651.

A Collator is thread-safe only when frozen. See { #isFrozen() and com.ibm.icu.util.Freezable.

Users are strongly encouraged to read the User Guide for more information about the collation service before using this class.

Create a RuleBasedCollator from a locale by calling the getInstance(Locale) factory method in the base class Collator. Collator.getInstance(Locale) creates a RuleBasedCollator object based on the collation rules defined by the argument locale. If a customized collation ordering or attributes is required, use the RuleBasedCollator(String) constructor with the appropriate rules. The customized RuleBasedCollator will base its ordering on the CLDR root collation, while re-adjusting the attributes and orders of the characters in the specified rule accordingly.

RuleBasedCollator provides correct collation orders for most locales supported in ICU. If specific data for a locale is not available, the orders eventually falls back to the CLDR root sort order.

For information about the collation rule syntax and details about customization, please refer to the Collation customization section of the User Guide.

Note that there are some differences between the Collation rule syntax used in Java and ICU4J:

According to the JDK documentation:
Modifier '!' : Turns on Thai/Lao vowel-consonant swapping. If this rule is in force when a Thai vowel of the range \U0E40-\U0E44 precedes a Thai consonant of the range \U0E01-\U0E2E OR a Lao vowel of the range \U0EC0-\U0EC4 precedes a Lao consonant of the range \U0E81-\U0EAE then the vowel is placed after the consonant for collation purposes.
If a rule is without the modifier '!', the Thai/Lao vowel-consonant swapping is not turned on.
ICU4J's RuleBasedCollator does not support turning off the Thai/Lao vowel-consonant swapping, since the UCA clearly states that it has to be supported to ensure a correct sorting order. If a '!' is encountered, it is ignored.
As mentioned in the documentation of the base class Collator, compatibility decomposition mode is not supported.

Examples

Creating Customized RuleBasedCollators:

 
String simple = "& a < b < c < d"; 
RuleBasedCollator simpleCollator = new RuleBasedCollator(simple); 
String norwegian = "& a , A < b , B < c , C < d , D < e , E " 
+ "< f , F < g , G < h , H < i , I < j , " 
+ "J < k , K < l , L < m , M < n , N < " 
+ "o , O < p , P < q , Q <r , R <s , S < " 
+ "t , T < u , U < v , V < w , W < x , X " 
+ "< y , Y < z , Z < \u00E5 = a\u030A " 
+ ", \u00C5 = A\u030A ; aa , AA < \u00E6 " 
+ ", \u00C6 < \u00F8 , \u00D8"; 
RuleBasedCollator norwegianCollator = new RuleBasedCollator(norwegian);

Concatenating rules to combine Collators:

 
// Create an en_US Collator object 
RuleBasedCollator en_USCollator = (RuleBasedCollator) 
Collator.getInstance(new Locale("en", "US", "")); 
// Create a da_DK Collator object 
RuleBasedCollator da_DKCollator = (RuleBasedCollator) 
Collator.getInstance(new Locale("da", "DK", "")); 
// Combine the two 
// First, get the collation rules from en_USCollator 
String en_USRules = en_USCollator.getRules(); 
// Second, get the collation rules from da_DKCollator 
String da_DKRules = da_DKCollator.getRules(); 
RuleBasedCollator newCollator = 
new RuleBasedCollator(en_USRules + da_DKRules); 
// newCollator has the combined rules

Making changes to an existing RuleBasedCollator to create a new Collator object, by appending changes to the existing rule:

 
// Create a new Collator object with additional rules 
String addRules = "& C < ch, cH, Ch, CH"; 
RuleBasedCollator myCollator = 
new RuleBasedCollator(en_USCollator.getRules() + addRules); 
// myCollator contains the new rules

How to change the order of non-spacing accents:

 
// old rule with main accents 
String oldRules = "= \u0301 ; \u0300 ; \u0302 ; \u0308 " 
+ "; \u0327 ; \u0303 ; \u0304 ; \u0305 " 
+ "; \u0306 ; \u0307 ; \u0309 ; \u030A " 
+ "; \u030B ; \u030C ; \u030D ; \u030E " 
+ "; \u030F ; \u0310 ; \u0311 ; \u0312 " 
+ "< a , A ; ae, AE ; \u00e6 , \u00c6 " 
+ "< b , B < c, C < e, E & C < d , D"; 
// change the order of accent characters 
String addOn = "& \u0300 ; \u0308 ; \u0302"; 
RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn);

Putting in a new primary ordering before the default setting, e.g. sort English characters before or after Japanese characters in the Japanese Collator:

 
// get en_US Collator rules 
RuleBasedCollator en_USCollator 
= (RuleBasedCollator)Collator.getInstance(Locale.US); 
// add a few Japanese characters to sort before English characters 
// suppose the last character before the first base letter 'a' in 
// the English collation rule is \u2212 
String jaString = "& \u2212 <\u3041, \u3042 <\u3043, " 
+ "\u3044"; 
RuleBasedCollator myJapaneseCollator 
= new RuleBasedCollator(en_USCollator.getRules() + jaString);

This class is not subclassable

Most used methods

setStrength
Sets this Collator's strength attribute. The strength attribute determines the minimum level of diff
setAlternateHandlingShifted
Sets the alternate handling for QUATERNARY strength to be either shifted or non-ignorable. See the U
setCaseLevel
When case level is set to true, an additional weight is formed between the SECONDARY and TERTIARY w
setDecomposition
Sets the decomposition mode of this Collator. Setting this decomposition attribute with CANONICAL_DE
setLowerCaseFirst
Sets the orders of lower cased characters to sort before upper cased characters, in strength TERTIAR
setNumericCollation
When numeric collation is turned on, this Collator makes substrings of digits sort according to the
setUpperCaseFirst
Sets whether uppercase characters sort before lowercase characters or vice versa, in strength TERTIA
<init>
Constructor that takes the argument rules for customization. The collator will be based on the CLDR
getRawCollationKey
Gets the simpler form of a CollationKey for the String source following the rules of this Collator a
getCollationElementIterator
Return a CollationElementIterator for the given CharacterIterator. The source iterator's integrity w
isLowerCaseFirst
Return true if a lowercase character is sorted before the corresponding uppercase character. See set
isUpperCaseFirst
Return true if an uppercase character is sorted before the corresponding lowercase character. See se

Popular in Java

Reading from database using SQL prepared statement
setContentView (Activity)
onRequestPermissionsResult (Fragment)
scheduleAtFixedRate (Timer)
FileNotFoundException (java.io)
Thrown when a file specified by a program cannot be found.
ConnectException (java.net)
A ConnectException is thrown if a connection cannot be established to a remote host on a specific po
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
Executor (java.util.concurrent)
An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
ExecutorService (java.util.concurrent)
An Executor that provides methods to manage termination and methods that can produce a Future for tr
JFrame (javax.swing)
Top plugins for WebStorm

How to useRuleBasedCollator in com.ibm.icu.text

Best Java code snippets using com.ibm.icu.text.RuleBasedCollator (Showing top 20 results out of 315)

How to use
RuleBasedCollator
in
com.ibm.icu.text