static boolean mayHaveLccc(int c) { // Handles all of Unicode 0..10FFFF. // c can be negative, e.g., Collation.SENTINEL_CP. // U+0300 is the first character with lccc!=0. if(c < 0x300) { return false; } if(c > 0xffff) { c = UTF16.getLeadSurrogate(c); } int i; return (i = lcccIndex[c >> 5]) != 0 && (lcccBits[i] & (1 << (c & 0x1f))) != 0; }
/** * Adds a codepoint to offset16 position of the argument char array. * * @param target Char array to be append with the new code point * @param limit UTF16 offset which the codepoint will be appended. * @param char32 Code point to be appended * @return offset after char32 in the array. * @exception IllegalArgumentException Thrown if there is not enough space for the append, or when char32 does not * lie within the range of the Unicode codepoints. * @stable ICU 2.1 */ public static int append(char[] target, int limit, int char32) { // Check for irregular values if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) { throw new IllegalArgumentException("Illegal codepoint"); } // Write the UTF-16 values if (char32 >= SUPPLEMENTARY_MIN_VALUE) { target[limit++] = getLeadSurrogate(char32); target[limit++] = getTrailSurrogate(char32); } else { target[limit++] = (char) char32; } return limit; }
/** * Append a single UTF-32 value to the end of a StringBuffer. If a validity check is required, * use {@link com.ibm.icu.lang.UCharacter#isLegal(int)} on char32 before * calling. * * @param target The buffer to append to * @param char32 Value to append. * @return the updated StringBuffer * @exception IllegalArgumentException Thrown when char32 does not lie within the range of the Unicode codepoints * @stable ICU 2.1 */ public static StringBuffer append(StringBuffer target, int char32) { // Check for irregular values if (char32 < CODEPOINT_MIN_VALUE || char32 > CODEPOINT_MAX_VALUE) { throw new IllegalArgumentException("Illegal codepoint: " + Integer.toHexString(char32)); } // Write the UTF-16 values if (char32 >= SUPPLEMENTARY_MIN_VALUE) { target.append(getLeadSurrogate(char32)); target.append(getTrailSurrogate(char32)); } else { target.append((char) char32); } return target; }
/** * <p> * Converts argument code point and returns a String object representing the code point's value * in UTF16 format. * </p> * <p> * This method does not check for the validity of the codepoint, the results are not guaranteed * if a invalid codepoint is passed as argument. * </p> * <p> * The result is a string whose length is 1 for non-supplementary code points, 2 otherwise. * </p> * * @param ch * code point * @return string representation of the code point */ private static String toString(int ch) { if (ch < SUPPLEMENTARY_MIN_VALUE) { return String.valueOf((char) ch); } StringBuilder result = new StringBuilder(); result.append(getLeadSurrogate(ch)); result.append(getTrailSurrogate(ch)); return result.toString(); } }
/** * TODO: create Appendable version of UTF16.append(buf, c), * maybe in new class Appendables? * @throws IOException */ private static void appendCodePoint(Appendable app, int c) { assert 0 <= c && c <= 0x10ffff; try { if (c <= 0xffff) { app.append((char) c); } else { app.append(UTF16.getLeadSurrogate(c)).append(UTF16.getTrailSurrogate(c)); } } catch (IOException e) { throw new ICUUncheckedIOException(e); } }
private CoderResult toUWriteCodePoint(int c, CharBuffer target, IntBuffer offsets, int sourceIndex) { CoderResult cr = CoderResult.UNDERFLOW; int tBeginIndex = target.position(); if (target.hasRemaining()) { if (c <= 0xffff) { target.put((char) c); c = UConverterConstants.U_SENTINEL; } else /* c is a supplementary code point */{ target.put(UTF16.getLeadSurrogate(c)); c = UTF16.getTrailSurrogate(c); if (target.hasRemaining()) { target.put((char) c); c = UConverterConstants.U_SENTINEL; } } /* write offsets */ if (offsets != null) { offsets.put(sourceIndex); if ((tBeginIndex + 1) < target.position()) { offsets.put(sourceIndex); } } } /* write overflow from c */ if (c >= 0) { charErrorBufferLength = UTF16.append(charErrorBufferArray, 0, c); cr = CoderResult.OVERFLOW; } return cr; }
/** * Checks if we are beginning at the start of a initial block. * If we are then the rest of the codepoints in this initial block * has the same values. * We increment m_nextCodepoint_ and relevant data members if so. * This is used only in for the supplementary codepoints because * the offset to the trail indexes could be 0. * @return true if we are at the start of a initial block. */ private final boolean checkNullNextTrailIndex() { if (m_nextIndex_ <= 0) { m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1; int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_); int leadBlock = m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << Trie.INDEX_STAGE_2_SHIFT_; if (m_trie_.m_dataManipulate_ == null) { throw new NullPointerException( "The field DataManipulate in this Trie is null"); } m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset( m_trie_.getValue(leadBlock + (nextLead & Trie.INDEX_STAGE_3_MASK_))); m_nextIndex_ --; m_nextBlockIndex_ = DATA_BLOCK_LENGTH_; return true; } return false; }
} else { target.put(UTF16.getLeadSurrogate(char32)); char32 = UTF16.getTrailSurrogate(char32); if (target.hasRemaining()) {
target.put(UTF16.getLeadSurrogate(c)); if(target.hasRemaining()) { target.put(UTF16.getTrailSurrogate(c));
if (value != getValue(UTF16.getLeadSurrogate(c))) { if (!setValue(UTF16.getLeadSurrogate(c), value)) {
/** * Internal trie getter from a code point. * Could be faster(?) but longer with * if((c32)<=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); } * Gets the offset to data which the codepoint points to * @param ch codepoint * @return offset to data */ protected final int getCodePointOffset(int ch) { // if ((ch >> 16) == 0) slower if (ch < 0) { return -1; } else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { // fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works return getRawOffset(0, (char)ch); } else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) { // BMP codepoint return getBMPOffset((char)ch); } else if (ch <= UCharacter.MAX_VALUE) { // look at the construction of supplementary characters // trail forms the ends of it. return getSurrogateOffset(UTF16.getLeadSurrogate(ch), (char)(ch & SURROGATE_MASK_)); } else { // return -1 if there is an error, in this case we return return -1; } }
if(composite>0xffff) { sb.setCharAt(starter, UTF16.getLeadSurrogate(composite)); sb.setCharAt(starter+1, UTF16.getTrailSurrogate(composite)); } else { sb.setCharAt(starter, UTF16.getLeadSurrogate(composite)); sb.insert(starter+1, UTF16.getTrailSurrogate(composite)); ++p;
/** * Traverses the trie from the current state for the * one or two UTF-16 code units for this input code point. * @param cp A Unicode code point 0..0x10ffff. * @return The match/value Result. * @stable ICU 4.8 */ public Result nextForCodePoint(int cp) { return cp<=0xffff ? next(cp) : (next(UTF16.getLeadSurrogate(cp)).hasNext() ? next(UTF16.getTrailSurrogate(cp)) : Result.NO_MATCH); }
/** * Traverses the trie from the initial state for the * one or two UTF-16 code units for this input code point. * Equivalent to reset().nextForCodePoint(cp). * @param cp A Unicode code point 0..0x10ffff. * @return The match/value Result. * @stable ICU 4.8 */ public Result firstForCodePoint(int cp) { return cp<=0xffff ? first(cp) : (first(UTF16.getLeadSurrogate(cp)).hasNext() ? next(UTF16.getTrailSurrogate(cp)) : Result.NO_MATCH); }
} else { dest.insert(codeUnitIndex, UTF16.getLeadSurrogate(n)); dest.insert(codeUnitIndex+1, UTF16.getTrailSurrogate(n));
int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_);