com.ibm.icu.text.UTF16.getCharCount java code examples

/*public*/int fromUCountPending() {
  if (preFromULength > 0) {
    return UTF16.getCharCount(preFromUFirstCP) + preFromULength;
  } else if (preFromULength < 0) {
    return -preFromULength;
  } else if (fromUChar32 > 0) {
    return 1;
  } else if (preFromUFirstCP > 0) {
    return UTF16.getCharCount(preFromUFirstCP);
  }
  return 0;
}

static final int posBefore(Replaceable str, int pos) {
  return (pos > 0) ?
    pos - UTF16.getCharCount(str.char32At(pos-1)) :
    pos - 1;
}

/**
 * Iterate forward through the string to fetch the next code point
 * to be case-mapped, and set the context indexes for it.
 *
 * @return The next code point to be case-mapped, or <0 when the iteration is done.
 */
public int nextCaseMapCP() {
  int c;
  if(cpLimit<limit) {
    cpStart=cpLimit;
    c=rep.char32At(cpLimit);
    cpLimit+=UTF16.getCharCount(c);
    return c;
  } else {
    return -1;
  }
}

public int next() {
  int c;
  if(dir>0) {
    if(index<contextLimit) {
      c=rep.char32At(index);
      index+=UTF16.getCharCount(c);
      return c;
    } else {
      // forward context iteration reached the limit
      reachedLimit=true;
    }
  } else if(dir<0 && index>contextStart) {
    c=rep.char32At(index-1);
    index-=UTF16.getCharCount(c);
    return c;
  }
  return -1;
}

/**
 * Skips over a run of zero or more Pattern_White_Space characters at pos in text.
 */
private static int skipPatternWhiteSpace(String text, int pos) {
  while (pos < text.length()) {
    int c = UTF16.charAt(text, pos);
    if (!PatternProps.isWhiteSpace(c)) {
      break;
    }
    pos += UTF16.getCharCount(c);
  }
  return pos;
}

/**
 * Skips over a run of zero or more isUWhiteSpace() characters at pos in text.
 */
private static int skipUWhiteSpace(String text, int pos) {
  while (pos < text.length()) {
    int c = UTF16.charAt(text, pos);
    if (!UCharacter.isUWhiteSpace(c)) {
      break;
    }
    pos += UTF16.getCharCount(c);
  }
  return pos;
}

 /**
 * Skips over a run of zero or more bidi marks at pos in text.
 */
private static int skipBidiMarks(String text, int pos) {
  while (pos < text.length()) {
    int c = UTF16.charAt(text, pos);
    if (!isBidiMark(c)) {
      break;
    }
    pos += UTF16.getCharCount(c);
  }
  return pos;
}

  /**
   * Return true if this half looks like valid input, that is, does not
   * contain functions or other special output-only elements.
   */
  public boolean isValidInput(TransliteratorParser parser) {
    for (int i=0; i<text.length(); ) {
      int c = UTF16.charAt(text, i);
      i += UTF16.getCharCount(c);
      if (!parser.parseData.isMatcher(c)) {
        return false;
      }
    }
    return true;
  }
}

/**
 * Return next char from the one that was just returned.
 * @return next char.
 * 
 * @since 1.2.0
 */
public int next() {
  if (!hasNext())
    throw new IllegalStateException();
  
  int next = utfCharAt(pos);
  lastCharIndex = pos;
  pos += UTF16.getCharCount(next);
  return next;
}

/**
 * Return true if this half looks like valid output, that is, does not
 * contain quantifiers or other special input-only elements.
 */
public boolean isValidOutput(TransliteratorParser parser) {
  for (int i=0; i<text.length(); ) {
    int c = UTF16.charAt(text, i);
    i += UTF16.getCharCount(c);
    if (!parser.parseData.isReplacer(c)) {
      return false;
    }
  }
  return true;
}

static final int posAfter(Replaceable str, int pos) {
  return (pos >= 0 && pos < str.length()) ?
    pos + UTF16.getCharCount(str.char32At(pos)) :
    pos + 1;
}

/**
 * Find the first index at or after fromIndex where the UnicodeSet matches at that index.
 * If findNot is true, then reverse the sense of the match: find the first place where the UnicodeSet doesn't match.
 * If there is no match, length is returned.
 * @internal
 * @deprecated This API is ICU internal only. Use span instead.
 */
@Deprecated
public int findIn(CharSequence value, int fromIndex, boolean findNot) {
  //TODO add strings, optimize, using ICU4C algorithms
  int cp;
  for (; fromIndex < value.length(); fromIndex += UTF16.getCharCount(cp)) {
    cp = UTF16.charAt(value, fromIndex);
    if (contains(cp) != findNot) {
      break;
    }
  }
  return fromIndex;
}

public String parseReference(String text, ParsePosition pos, int limit) {
  int start = pos.getIndex();
  int i = start;
  String result = "";
  while (i < limit) {
    int c = UTF16.charAt(text, i);
    if ((i == start && !UCharacter.isUnicodeIdentifierStart(c))
        || !UCharacter.isUnicodeIdentifierPart(c)) {
      break;
    }
    i += UTF16.getCharCount(c);
  }
  if (i == start) { // No valid name chars
    return result; // Indicate failure with empty string
  }
  pos.setIndex(i);
  result = text.substring(start, i);
  return result;
}

/**
 * Adds each of the characters in this string to the set. Thus "ch" =&gt; {"c", "h"}
 * If this set already any particular character, it has no effect on that character.
 * @param s the source string
 * @return this object, for chaining
 * @stable ICU 2.0
 */
public final UnicodeSet addAll(CharSequence s) {
  checkFrozen();
  int cp;
  for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
    cp = UTF16.charAt(s, i);
    add_unchecked(cp, cp);
  }
  return this;
}

/**
 * Recursive routine called if we fail to find a match in containsAll, and there are strings
 * @param s source string
 * @param i point to match to the end on
 * @return true if ok
 */
private boolean containsAll(String s, int i) {
  if (i >= s.length()) {
    return true;
  }
  int  cp= UTF16.charAt(s, i);
  if (contains(cp) && containsAll(s, i+UTF16.getCharCount(cp))) {
    return true;
  }
  for (String setStr : strings) {
    if (s.startsWith(setStr, i) &&  containsAll(s, i+setStr.length())) {
      return true;
    }
  }
  return false;
}

/**
 * Skips ahead past any ignored characters, as indicated by the given
 * options.  This is useful in conjunction with the lookahead() method.
 *
 * Currently, this only has an effect for SKIP_WHITESPACE.
 * @param options one or more of the following options, bitwise-OR-ed
 * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
 */
public void skipIgnored(int options) {
  if ((options & SKIP_WHITESPACE) != 0) {
    for (;;) {
      int a = _current();
      if (!PatternProps.isWhiteSpace(a)) break;
      _advance(UTF16.getCharCount(a));
    }
  }
}

/**
 * Implementation of UnicodeMatcher API.  Union the set of all
 * characters that may be matched by this object into the given
 * set.
 * @param toUnionTo the set into which to union the source characters
 */
@Override
public void addMatchSetTo(UnicodeSet toUnionTo) {
  int ch;
  for (int i=0; i<pattern.length(); i+=UTF16.getCharCount(ch)) {
    ch = UTF16.charAt(pattern, i);
    UnicodeMatcher matcher = data.lookupMatcher(ch);
    if (matcher == null) {
      toUnionTo.add(ch);
    } else {
      matcher.addMatchSetTo(toUnionTo);
    }
  }
}

private int calcStatus(int current, int next) {
 if (current == BreakIterator.DONE || next == BreakIterator.DONE)
  return RuleBasedBreakIterator.WORD_NONE;
 int begin = start + current;
 int end = start + next;
 int codepoint;
 for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
  codepoint = UTF16.charAt(text, 0, end, begin);
  if (UCharacter.isDigit(codepoint))
   return RuleBasedBreakIterator.WORD_NUMBER;
  else if (UCharacter.isLetter(codepoint)) {
   // TODO: try to separately specify ideographic, kana? 
   // [currently all bundled as letter for this case]
   return RuleBasedBreakIterator.WORD_LETTER;
  }
 }
 return RuleBasedBreakIterator.WORD_NONE;
}

  /**
   * Union the set of all characters that may output by this object
   * into the given set.
   * @param toUnionTo the set into which to union the output characters
   */
  @Override
  public void addReplacementSetTo(UnicodeSet toUnionTo) {
    int ch;
    for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) {
      ch = UTF16.charAt(output, i);
      UnicodeReplacer r = data.lookupReplacer(ch);
      if (r == null) {
        toUnionTo.add(ch);
      } else {
        r.addReplacementSetTo(toUnionTo);
      }
    }
  }
}

/**
 * Matches a string at text[pos] and return the index of the next character upon
 * success. Return -1 on failure. Match a run of white space in str with a run of
 * white space in text.
 */
static final int match(String text, int pos, String str) {
  for (int i = 0; i < str.length() && pos >= 0;) {
    int ch = UTF16.charAt(str, i);
    i += UTF16.getCharCount(ch);
    if (isBidiMark(ch)) {
      continue;
    }
    pos = match(text, pos, ch);
    if (PatternProps.isWhiteSpace(ch)) {
      i = skipPatternWhiteSpace(str, i);
    }
  }
  return pos;
}

Javadoc

Determines how many chars this char32 requires. If a validity check is required, use

 
isLegal()

on char32 before calling.

Popular methods of UTF16

charAt
Extract a single UTF-32 value from a substring. Used when iterating forwards or backwards (with UTF1
isSurrogate
Determines whether the code value is a surrogate.
isTrailSurrogate
Determines whether the character is a trail surrogate.
moveCodePointOffset
Shifts offset16 by the argument number of codepoints within a subarray.
setCharAt
Set a code point into a UTF16 position in a char array. Adjusts target according if we are replacing
append
Adds a codepoint to offset16 position of the argument char array.
countCodePoint
Number of codepoints in a UTF16 char array substring
getLeadSurrogate
Returns the lead surrogate. If a validity check is required, useisLegal() [../lang/UCharacter.html#i
getTrailSurrogate
Returns the trail surrogate. If a validity check is required, useisLegal() [../lang/UCharacter.html#
isLeadSurrogate
Determines whether the character is a lead surrogate.
_charAt
bounds
Returns the type of the boundaries around the char at offset16. Used for random access. Note that th

Popular in Java

Reactive rest calls using spring rest template
getSupportFragmentManager (FragmentActivity)
getResourceAsStream (ClassLoader)
scheduleAtFixedRate (Timer)
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
StringTokenizer (java.util)
Breaks a string into tokens; new code should probably use String#split.> // Legacy code: StringTo
AtomicInteger (java.util.concurrent.atomic)
An int value that may be updated atomically. See the java.util.concurrent.atomic package specificati
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
Window (java.awt)
A Window object is a top-level window with no borders and no menubar. The default layout for a windo
Top plugins for Android Studio

How to use getCharCountmethodin com.ibm.icu.text.UTF16

Best Java code snippets using com.ibm.icu.text.UTF16.getCharCount (Showing top 20 results out of 315)

How to use
getCharCount
method
in
com.ibm.icu.text.UTF16