/*public*/int fromUCountPending() { if (preFromULength > 0) { return UTF16.getCharCount(preFromUFirstCP) + preFromULength; } else if (preFromULength < 0) { return -preFromULength; } else if (fromUChar32 > 0) { return 1; } else if (preFromUFirstCP > 0) { return UTF16.getCharCount(preFromUFirstCP); } return 0; }
static final int posBefore(Replaceable str, int pos) { return (pos > 0) ? pos - UTF16.getCharCount(str.char32At(pos-1)) : pos - 1; }
/** * Iterate forward through the string to fetch the next code point * to be case-mapped, and set the context indexes for it. * * @return The next code point to be case-mapped, or <0 when the iteration is done. */ public int nextCaseMapCP() { int c; if(cpLimit<limit) { cpStart=cpLimit; c=rep.char32At(cpLimit); cpLimit+=UTF16.getCharCount(c); return c; } else { return -1; } }
public int next() { int c; if(dir>0) { if(index<contextLimit) { c=rep.char32At(index); index+=UTF16.getCharCount(c); return c; } else { // forward context iteration reached the limit reachedLimit=true; } } else if(dir<0 && index>contextStart) { c=rep.char32At(index-1); index-=UTF16.getCharCount(c); return c; } return -1; }
/** * Skips over a run of zero or more Pattern_White_Space characters at pos in text. */ private static int skipPatternWhiteSpace(String text, int pos) { while (pos < text.length()) { int c = UTF16.charAt(text, pos); if (!PatternProps.isWhiteSpace(c)) { break; } pos += UTF16.getCharCount(c); } return pos; }
/** * Skips over a run of zero or more isUWhiteSpace() characters at pos in text. */ private static int skipUWhiteSpace(String text, int pos) { while (pos < text.length()) { int c = UTF16.charAt(text, pos); if (!UCharacter.isUWhiteSpace(c)) { break; } pos += UTF16.getCharCount(c); } return pos; }
/** * Skips over a run of zero or more bidi marks at pos in text. */ private static int skipBidiMarks(String text, int pos) { while (pos < text.length()) { int c = UTF16.charAt(text, pos); if (!isBidiMark(c)) { break; } pos += UTF16.getCharCount(c); } return pos; }
/** * Return true if this half looks like valid input, that is, does not * contain functions or other special output-only elements. */ public boolean isValidInput(TransliteratorParser parser) { for (int i=0; i<text.length(); ) { int c = UTF16.charAt(text, i); i += UTF16.getCharCount(c); if (!parser.parseData.isMatcher(c)) { return false; } } return true; } }
/** * Return next char from the one that was just returned. * @return next char. * * @since 1.2.0 */ public int next() { if (!hasNext()) throw new IllegalStateException(); int next = utfCharAt(pos); lastCharIndex = pos; pos += UTF16.getCharCount(next); return next; }
/** * Return true if this half looks like valid output, that is, does not * contain quantifiers or other special input-only elements. */ public boolean isValidOutput(TransliteratorParser parser) { for (int i=0; i<text.length(); ) { int c = UTF16.charAt(text, i); i += UTF16.getCharCount(c); if (!parser.parseData.isReplacer(c)) { return false; } } return true; }
static final int posAfter(Replaceable str, int pos) { return (pos >= 0 && pos < str.length()) ? pos + UTF16.getCharCount(str.char32At(pos)) : pos + 1; }
/** * Find the first index at or after fromIndex where the UnicodeSet matches at that index. * If findNot is true, then reverse the sense of the match: find the first place where the UnicodeSet doesn't match. * If there is no match, length is returned. * @internal * @deprecated This API is ICU internal only. Use span instead. */ @Deprecated public int findIn(CharSequence value, int fromIndex, boolean findNot) { //TODO add strings, optimize, using ICU4C algorithms int cp; for (; fromIndex < value.length(); fromIndex += UTF16.getCharCount(cp)) { cp = UTF16.charAt(value, fromIndex); if (contains(cp) != findNot) { break; } } return fromIndex; }
public String parseReference(String text, ParsePosition pos, int limit) { int start = pos.getIndex(); int i = start; String result = ""; while (i < limit) { int c = UTF16.charAt(text, i); if ((i == start && !UCharacter.isUnicodeIdentifierStart(c)) || !UCharacter.isUnicodeIdentifierPart(c)) { break; } i += UTF16.getCharCount(c); } if (i == start) { // No valid name chars return result; // Indicate failure with empty string } pos.setIndex(i); result = text.substring(start, i); return result; }
/** * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} * If this set already any particular character, it has no effect on that character. * @param s the source string * @return this object, for chaining * @stable ICU 2.0 */ public final UnicodeSet addAll(CharSequence s) { checkFrozen(); int cp; for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { cp = UTF16.charAt(s, i); add_unchecked(cp, cp); } return this; }
/** * Recursive routine called if we fail to find a match in containsAll, and there are strings * @param s source string * @param i point to match to the end on * @return true if ok */ private boolean containsAll(String s, int i) { if (i >= s.length()) { return true; } int cp= UTF16.charAt(s, i); if (contains(cp) && containsAll(s, i+UTF16.getCharCount(cp))) { return true; } for (String setStr : strings) { if (s.startsWith(setStr, i) && containsAll(s, i+setStr.length())) { return true; } } return false; }
/** * Skips ahead past any ignored characters, as indicated by the given * options. This is useful in conjunction with the lookahead() method. * * Currently, this only has an effect for SKIP_WHITESPACE. * @param options one or more of the following options, bitwise-OR-ed * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE. */ public void skipIgnored(int options) { if ((options & SKIP_WHITESPACE) != 0) { for (;;) { int a = _current(); if (!PatternProps.isWhiteSpace(a)) break; _advance(UTF16.getCharCount(a)); } } }
/** * Implementation of UnicodeMatcher API. Union the set of all * characters that may be matched by this object into the given * set. * @param toUnionTo the set into which to union the source characters */ @Override public void addMatchSetTo(UnicodeSet toUnionTo) { int ch; for (int i=0; i<pattern.length(); i+=UTF16.getCharCount(ch)) { ch = UTF16.charAt(pattern, i); UnicodeMatcher matcher = data.lookupMatcher(ch); if (matcher == null) { toUnionTo.add(ch); } else { matcher.addMatchSetTo(toUnionTo); } } }
private int calcStatus(int current, int next) { if (current == BreakIterator.DONE || next == BreakIterator.DONE) return RuleBasedBreakIterator.WORD_NONE; int begin = start + current; int end = start + next; int codepoint; for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) { codepoint = UTF16.charAt(text, 0, end, begin); if (UCharacter.isDigit(codepoint)) return RuleBasedBreakIterator.WORD_NUMBER; else if (UCharacter.isLetter(codepoint)) { // TODO: try to separately specify ideographic, kana? // [currently all bundled as letter for this case] return RuleBasedBreakIterator.WORD_LETTER; } } return RuleBasedBreakIterator.WORD_NONE; }
/** * Union the set of all characters that may output by this object * into the given set. * @param toUnionTo the set into which to union the output characters */ @Override public void addReplacementSetTo(UnicodeSet toUnionTo) { int ch; for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) { ch = UTF16.charAt(output, i); UnicodeReplacer r = data.lookupReplacer(ch); if (r == null) { toUnionTo.add(ch); } else { r.addReplacementSetTo(toUnionTo); } } } }
/** * Matches a string at text[pos] and return the index of the next character upon * success. Return -1 on failure. Match a run of white space in str with a run of * white space in text. */ static final int match(String text, int pos, String str) { for (int i = 0; i < str.length() && pos >= 0;) { int ch = UTF16.charAt(str, i); i += UTF16.getCharCount(ch); if (isBidiMark(ch)) { continue; } pos = match(text, pos, ch); if (PatternProps.isWhiteSpace(ch)) { i = skipPatternWhiteSpace(str, i); } } return pos; }