java.text.RuleBasedCollator java code examples

Refine search

Collator

/**
 * Returns a {@code Collator} instance which is appropriate for {@code locale}.
 */
public static Collator getInstance(Locale locale) {
  if (locale == null) {
    throw new NullPointerException("locale == null");
  }
  return new RuleBasedCollator(new RuleBasedCollatorICU(locale));
}

 public int compareTo(Object o) {
 final String pattern = ((StringComparable)o).toString();
 if(m_text.equals(pattern)){//Code-point equals 
  return 0;
 }
 final int savedStrength = m_collator.getStrength(); 
 int comp = 0;
  // Is there difference more significant than case-order?     
  if(((savedStrength == Collator.PRIMARY) || (savedStrength == Collator.SECONDARY))){  
    comp = m_collator.compare(m_text, pattern );     
  }else{// more than SECONDARY
    m_collator.setStrength(Collator.SECONDARY);
    comp = m_collator.compare(m_text, pattern );
    m_collator.setStrength(savedStrength);
  }
  if(comp != 0){//Difference more significant than case-order 
   return comp ; 
  }      
   
  // No difference more significant than case-order.     
  // Find case difference
   comp = getCaseDiff(m_text, pattern);
   if(comp != 0){  
     return comp;
   }else{// No case differences. Less significant difference could exist 
     return m_collator.compare(m_text, pattern );
   }      
}

private final int[] getFirstCaseDiff(final String text, final String pattern, final Locale locale){
   final CollationElementIterator targIter = m_collator.getCollationElementIterator(text);
   final CollationElementIterator patIter = m_collator.getCollationElementIterator(pattern);  
   int startTarg = -1;
   int endTarg = -1;
         final String  subTextUp = subText.toUpperCase(locale);
         final String  subPattUp = subPatt.toUpperCase(locale);
         if(m_collator.compare(subTextUp, subPattUp) != 0){ // not case diffference
           continue;
         if(m_collator.compare(subText, subTextUp) == 0){
           diff[0] = UPPER_CASE;
         }else if(m_collator.compare(subText, subText.toLowerCase(locale)) == 0){
           diff[0] = LOWER_CASE; 
         if(m_collator.compare(subPatt, subPattUp) == 0){
           diff[1] = UPPER_CASE;
         }else if(m_collator.compare(subPatt, subPatt.toLowerCase(locale)) == 0){
           diff[1] = LOWER_CASE;

private final int getCaseDiff (final String text, final String pattern){
  final int savedStrength = m_collator.getStrength();
  final int savedDecomposition = m_collator.getDecomposition();
  m_collator.setStrength(Collator.TERTIARY);// not to ignore case  
  m_collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION );// corresponds NDF
   final int diff[] =getFirstCaseDiff (text, pattern, m_locale);
 m_collator.setStrength(savedStrength);// restore
 m_collator.setDecomposition(savedDecomposition); //restore
 if(diff != null){  
   if((m_caseOrder).equals("upper-first")){
     if(diff[0] == UPPER_CASE){
       return -1;
     }else{
       return 1;
     }
   }else{// lower-first
     if(diff[0] == LOWER_CASE){
       return -1;
     }else{
       return 1;
     }
   }
 }else{// No case differences
   return 0;
 }
 
}

public int compare(Object a, Object b) {
  RuleBasedCollator collator_ini = (RuleBasedCollator)Collator.getInstance();
  try{
    RuleBasedCollator collator = new RuleBasedCollator(collator_ini.getRules().replaceAll("<'\u005f'", "<' '<'\u005f'"));
      String aFirstElement = (String) ((ArrayList) a).get(0);
      String bFirstElement = (String) ((ArrayList) b).get(0);
      if (collator.compare(aFirstElement, bFirstElement) < 0)
        return -1;
      else if (collator.compare(aFirstElement, bFirstElement) > 0)
        return 1;
      else {
        String aSecondElement = (String) ((ArrayList) a).get(1);
        String bSecondElement = (String) ((ArrayList) b).get(1);
        if (collator.compare(aSecondElement,bSecondElement) < 0)
          return -1;
        else if (collator.compare(aSecondElement,bSecondElement) > 0)
          return 1;
        else {
          String aThirdElement = (String) ((ArrayList) a).get(2);
          String bThirdElement = (String) ((ArrayList) b).get(2);
          if (collator.compare(aThirdElement,bThirdElement) < 0)
            return -1;
          else if (collator.compare(aThirdElement,bThirdElement) > 0)
            return 1;
     log.error("ERROR compare: ",e);
  return Collator.getInstance().compare(a, b);

 RuleBasedCollator localRules = (RuleBasedCollator) Collator.getInstance();

String extraRules = IntStream.range(0, 100).mapToObj(String::valueOf).collect(joining(" < "));
RuleBasedCollator c = new RuleBasedCollator(localRules.getRules() + " & " + extraRules);

List<String> a = asList("1-2", "1-02", "1-20", "10-20", "fred", "jane", "pic01", "pic02", "pic02a", "pic 5", "pic05", "pic   7", "pic100", "pic100a", "pic120", "pic121");
shuffle(a);

a.sort(c);
System.out.println(a);

/**
 * Verify that JVM has support for the Collator for the datbase's locale.
 *
 * @param strength Collator strength or -1 for locale default.
 * @return Collator for database's locale
 * @throws StandardException if JVM does not have support for Collator
 */
private RuleBasedCollator verifyCollatorSupport(int strength)
throws StandardException {
  Locale[] availLocales =  Collator.getAvailableLocales();
  //Verify that Collator can be instantiated for the given locale.
  boolean localeFound = false;
  for (int i=0; i<availLocales.length;i++)
  {
    if (availLocales[i].equals(databaseLocale)) {
      localeFound = true;
      break;
    }
  }
  if (!localeFound)
    throw StandardException.newException(
        SQLState.COLLATOR_NOT_FOUND_FOR_LOCALE, 
        (databaseLocale != null ? databaseLocale.toString() : "null"));
  
  RuleBasedCollator collator = (RuleBasedCollator)Collator.getInstance(databaseLocale);
  if (strength != -1)
    collator.setStrength(strength);
  return collator;
}
/**

((RuleBasedCollator)Collator.getInstance(Locale.FRANCE)).getRules();

  uca = (RuleBasedCollator) Collator.getInstance(loc);
if (normalization != null) {
  if (normalization.equals("yes")) {
    uca.setDecomposition(java.text.Collator.CANONICAL_DECOMPOSITION);
  } else if (normalization.equals("no")) {
    uca.setDecomposition(java.text.Collator.NO_DECOMPOSITION);

public SiteComparator(String criterion, String asc, Locale locale) {
    this(criterion, asc);
    m_loc = locale;
    try {
  RuleBasedCollator defaultCollator = (RuleBasedCollator) Collator.getInstance(locale); 
    String rules = defaultCollator.getRules();
    localeCollator = new RuleBasedCollator(rules.replaceAll("<'\u005f'", "<' '<'\u005f'"));
    localeCollator.setStrength(Collator.TERTIARY);
    } catch (Exception e) {
      log.warn("SiteComparator failed to create RuleBasedCollator for locale " + locale.toString(), e);
      localeCollator = null;
    }
}

 import java.text.*;
import java.util.*;

public class Order {
 public static void main(String[] args) throws ParseException {
  List<String> list = Arrays.asList("a", "A", "\u00E6", "z", "Z", "1", "-");

  Collections.sort(list);
  print("Natural", list);

  Collections.sort(list, String.CASE_INSENSITIVE_ORDER);
  print("Case insensitive", list);

  Collator norwegian = Collator.getInstance(new Locale("nb_NO"));
  norwegian.setStrength(Collator.PRIMARY);
  Collections.sort(list, norwegian);
  print("Localized natural language rules (Norwegian)", list);

  Collator custom = new RuleBasedCollator("< a< A< z< Z< '-'< 1");
  Collections.sort(list, custom);
  print("Custom", list);
 }

 private static void print(String what, Object value) {
  System.out.println(what);
  System.out.println(value);
 }
}

 return Collator.getInstance (SystemHelper.getSystemLocale ());
final Collator aCollator = Collator.getInstance (aLocale);
if (aCollator == null)
 return Collator.getInstance (SystemHelper.getSystemLocale ());
 final String sRules = ((RuleBasedCollator) aCollator).getRules ();
 if (!sRules.contains ("<'.'<"))
 final RuleBasedCollator aNewCollator = new RuleBasedCollator (sNewRules);
 aNewCollator.setStrength (Collator.TERTIARY);
 aNewCollator.setDecomposition (Collator.FULL_DECOMPOSITION);
 return aNewCollator;

    return new RuleBasedCollator(rules);
return Collator.getInstance(locale);

 public int compareTo(Object o) {
 final String pattern = ((StringComparable)o).toString();
 if(m_text.equals(pattern)){//Code-point equals 
  return 0;
 }
 final int savedStrength = m_collator.getStrength(); 
 int comp = 0;
  // Is there difference more significant than case-order?     
  if(((savedStrength == Collator.PRIMARY) || (savedStrength == Collator.SECONDARY))){  
    comp = m_collator.compare(m_text, pattern );     
  }else{// more than SECONDARY
    m_collator.setStrength(Collator.SECONDARY);
    comp = m_collator.compare(m_text, pattern );
    m_collator.setStrength(savedStrength);
  }
  if(comp != 0){//Difference more significant than case-order 
   return comp ; 
  }      
   
  // No difference more significant than case-order.     
  // Find case difference
   comp = getCaseDiff(m_text, pattern);
   if(comp != 0){  
     return comp;
   }else{// No case differences. Less significant difference could exist 
     return m_collator.compare(m_text, pattern );
   }      
}

  Collator cl = (Collator) coll.clone();
  cl.setStrength(Collator.PRIMARY);
  return cl;
case V2:
  String rules = coll.getRules().replaceAll(",'-'", ""); // don't ignore dash
  rules = rules.replaceAll("<'_'", "<' '<'-'<'_'"); // sort dash and space before underscore
  try {
    coll = new RuleBasedCollator(rules);
    coll.setStrength(Collator.PRIMARY);  // ignore case and accent differences
    return coll;
  } catch (ParseException e) {

/**
 * Test whether one string contains another, according to the rules
 * of the XPath contains() function
 *
 * @param s1 the containing string
 * @param s2 the contained string
 * @return true iff s1 contains s2
 */
public boolean contains(String s1, String s2) {
  CollationElementIterator iter1 = collator.getCollationElementIterator(s1);
  CollationElementIterator iter2 = collator.getCollationElementIterator(s2);
  return collationContains(iter1, iter2, null, false);
}

    /**
     * Fixes collator rules to handle ' ' comparison properly.
     * @param source
     * @return patched collator
     */
    private Collator fixCollator(Collator source) {
      if (source instanceof RuleBasedCollator) {
        try {
          return new RuleBasedCollator(
            ((RuleBasedCollator)source).getRules().replaceAll(  
            "<'\u005f'", "<' '<'\u005f'"));
        } catch (ParseException e) {
//                    unable to fix, returning source silently
          return source;
        } 
      } else {
//                unable to fix, returning source silently
        return source;
      }
    }

/**
 * Ask whether two strings are considered equal under this collation
 * @param s1 the first string
 * @param s2 the second string
 * @return true if the strings are considered equal
 */
@Override
public boolean comparesEqual(CharSequence s1, CharSequence s2) {
  return uca.compare(s1, s2) == 0;
}

public MovieComparator() {
 RuleBasedCollator defaultCollator = (RuleBasedCollator) RuleBasedCollator.getInstance();
 try {
  // default collator ignores whitespaces
  // using hack from http://stackoverflow.com/questions/16567287/java-collation-ignores-space
  stringCollator = new RuleBasedCollator(defaultCollator.getRules().replace("<'\u005f'", "<' '<'\u005f'"));
 }
 catch (Exception e) {
  stringCollator = defaultCollator;
 }
}

rbc.setDecomposition(Collator.FULL_DECOMPOSITION);
String rules = rbc.getRules();
if (rules.startsWith("@")) {
sourceCursor = rbc.getCollationElementIterator(source);
targetCursor = rbc.getCollationElementIterator(target);
  initialCheckSecTer = rbc.getStrength() >= Collator.SECONDARY;
  checkSecTer = initialCheckSecTer;
  checkTertiary = rbc.getStrength() >= Collator.TERTIARY;

Javadoc

A concrete implementation class for Collation.

RuleBasedCollator has the following restrictions for efficiency (other subclasses may be used for more complex languages):

If a French secondary ordering is specified it applies to the whole collator object.
All non-mentioned Unicode characters are at the end of the collation order.
If a character is not located in the RuleBasedCollator, the default Unicode Collation Algorithm (UCA) rule-based table is automatically searched as a backup.

The collation table is composed of a list of collation rules, where each rule is of three forms:

 
<modifier> 
<relation> <text-argument> 
<reset> <text-argument>

The rule elements are defined as follows:

Modifier: There is a single modifier which is used to specify that all accents (secondary differences) are backwards:
- '@' : Indicates that accents are sorted backwards, as in French.
Relation: The relations are the following:
- '<' : Greater, as a letter difference (primary)
- ';' : Greater, as an accent difference (secondary)
- ',' : Greater, as a case difference (tertiary)
- '=' : Equal
Text-Argument: A text-argument is any sequence of characters, excluding special characters (that is, common whitespace characters [0009-000D, 0020] and rule syntax characters [0021-002F, 003A-0040, 005B-0060, 007B-007E]). If those characters are desired, you can put them in single quotes (for example, use '&' for ampersand). Note that unquoted white space characters are ignored; for example, b c is treated as bc.
Reset: There is a single reset which is used primarily for contractions and expansions, but which can also be used to add a modification at the end of a set of rules:
- '&' : Indicates that the next rule follows the position to where the reset text-argument would be sorted.

This sounds more complicated than it is in practice. For example, the following are equivalent ways of expressing the same thing:

 
a < b < c 
a < b & b < c 
a < c & a < b

Notice that the order is important, as the subsequent item goes immediately after the text-argument. The following are not equivalent:

 
a < b & a < c 
a < c & a < b

Either the text-argument must already be present in the sequence, or some initial substring of the text-argument must be present. For example "a < b & ae < e" is valid since "a" is present in the sequence before "ae" is reset. In this latter case, "ae" is not entered and treated as a single character; instead, "e" is sorted as if it were expanded to two characters: "a" followed by an "e". This difference appears in natural languages: in traditional Spanish "ch" is treated as if it contracts to a single character (expressed as "c < ch < d"), while in traditional German a-umlaut is treated as if it expands to two characters (expressed as "a,A < b,B ... & ae;\u00e3 & AE;\u00c3", where \u00e3 and \u00c3 are the escape sequences for a-umlaut).

Ignorable Characters

For ignorable characters, the first rule must start with a relation (the examples we have used above are really fragments; "a < b" really should be "< a < b"). If, however, the first relation is not "Normalization and Accents

RuleBasedCollator automatically processes its rule table to include both pre-composed and combining-character versions of accented characters. Even if the provided rule string contains only base characters and separate combining accent characters, the pre-composed accented characters matching all canonical combinations of characters from the rule string will be entered in the table.

This allows you to use a RuleBasedCollator to compare accented strings even when the collator is set to NO_DECOMPOSITION. However, if the strings to be collated contain combining sequences that may not be in canonical order, you should set the collator to CANONICAL_DECOMPOSITION to enable sorting of combining sequences. For more information, see The Unicode Standard, Version 3.0.

Errors

The following rules are not valid:

A text-argument contains unquoted punctuation symbols, for example "a < b-c < d".
A relation or reset character is not followed by a text-argument, for example "a < , b".
A reset where the text-argument (or an initial substring of the text-argument) is not already in the sequence or allocated in the default UCA table, for example "a < b & e < f".

If you produce one of these errors, RuleBasedCollator throws a ParseException.

Examples

Normally, to create a rule-based collator object, you will use Collator's factory method getInstance. However, to create a rule-based collator object with specialized rules tailored to your needs, you construct the RuleBasedCollator with the rules contained in a String object. For example:

 
String Simple = "< a < b < c < d"; 
RuleBasedCollator mySimple = new RuleBasedCollator(Simple);

Or:

 
String Norwegian = "< a,A< b,B< c,C< d,D< e,E< f,F< g,G< h,H< i,I" 
+ "< j,J< k,K< l,L< m,M< n,N< o,O< p,P< q,Q< r,R" 
+ "< s,S< t,T< u,U< v,V< w,W< x,X< y,Y< z,Z" 
+ "< \u00E5=a\u030A,\u00C5=A\u030A" 
+ ";aa,AA< \u00E6,\u00C6< \u00F8,\u00D8"; 
RuleBasedCollator myNorwegian = new RuleBasedCollator(Norwegian);

Combining Collators is as simple as concatenating strings. Here is an example that combines two Collators from two different locales:

 
// Create an en_US Collator object 
RuleBasedCollator en_USCollator = (RuleBasedCollator)Collator 
.getInstance(new Locale("en", "US", "")); 
// Create a da_DK Collator object 
RuleBasedCollator da_DKCollator = (RuleBasedCollator)Collator 
.getInstance(new Locale("da", "DK", "")); 
// Combine the two collators 
// First, get the collation rules from en_USCollator 
String en_USRules = en_USCollator.getRules(); 
// Second, get the collation rules from da_DKCollator 
String da_DKRules = da_DKCollator.getRules(); 
RuleBasedCollator newCollator = new RuleBasedCollator(en_USRules + da_DKRules); 
// newCollator has the combined rules

The next example shows to make changes on an existing table to create a new Collator object. For example, add "& C < ch, cH, Ch, CH" to the en_USCollator object to create your own:

 
// Create a new Collator object with additional rules 
String addRules = "& C < ch, cH, Ch, CH"; 
RuleBasedCollator myCollator = new RuleBasedCollator(en_USCollator + addRules); 
// myCollator contains the new rules

The following example demonstrates how to change the order of non-spacing accents:

 
// old rule 
String oldRules = "= \u00a8 ; \u00af ; \u00bf" + "< a , A ; ae, AE ; \u00e6 , \u00c6" 
+ "< b , B < c, C < e, E & C < d, D"; 
// change the order of accent characters 
String addOn = "& \u00bf ; \u00af ; \u00a8;"; 
RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn);

The last example shows how to put new primary ordering in before the default setting. For example, in the Japanese Collator, you can either sort English characters before or after Japanese characters:

 
// get en_US Collator rules 
RuleBasedCollator en_USCollator = (RuleBasedCollator) 
Collator.getInstance(Locale.US); 
// add a few Japanese character to sort before English characters 
// suppose the last character before the first base letter 'a' in 
// the English collation rule is \u30A2 
String jaString = "& \u30A2 , \u30FC < \u30C8"; 
RuleBasedCollator myJapaneseCollator = 
new RuleBasedCollator(en_USCollator.getRules() + jaString);

Most used methods

<init>
compare
Compares the character data stored in two different strings based on the collation rules. Returns in
getCollationElementIterator
Return a CollationElementIterator for the given String.
getRules
Gets the table-based rules for the collation object.
getCollationKey
Transforms the string into a series of characters that can be compared with CollationKey.compareTo.
setStrength
setDecomposition
getStrength
getDecomposition
getInstance
hashCode
Generates the hash code for the table-based collation object
clone
Standard override; no change in semantics.

Popular in Java

Reading from database using SQL prepared statement
getSupportFragmentManager (FragmentActivity)
getExternalFilesDir (Context)
getSystemService (Context)
FileInputStream (java.io)
An input stream that reads bytes from a file. File file = ...finally if (in != null) in.clos
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
Comparator (java.util)
A Comparator is used to compare two objects to determine their ordering with respect to each other.
HashSet (java.util)
HashSet is an implementation of a Set. All optional operations (adding and removing) are supported.
XPath (javax.xml.xpath)
XPath provides access to the XPath evaluation environment and expressions. Evaluation of XPath Expr
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
Top plugins for Android Studio

How to useRuleBasedCollator in java.text

Best Java code snippets using java.text.RuleBasedCollator (Showing top 20 results out of 342)

Refine search

How to use
RuleBasedCollator
in
java.text