/** Returns true if this is a latin digit (other digits are not consistently parsed into numbers by Java) */ public boolean isLatinDigit(int c) { return Character.isDigit(c) && isLatin(c); }
/** * Returns true for code points which are letters in unicode 3 or 4, plus some additional characters * which are useful to view as letters even though not defined as such in unicode. */ public boolean isLetter(int c) { if (java.lang.Character.isLetter(c)) return true; if (Character.isDigit(c) && ! isLatin(c)) return true; // Not considering these digits, so treat them as letters // if (c == '_') return true; // Ticket 3864695, some CJK punctuation YST defined as word characters if (c == '\u3008' || c == '\u3009' || c == '\u300a' || c == '\u300b' || c == '\u300c' || c == '\u300d' || c == '\u300e' || c == '\u300f' || c == '\u3010' || c == '\u3011') { return true; } int type = java.lang.Character.getType(c); return type == java.lang.Character.NON_SPACING_MARK || type == java.lang.Character.COMBINING_SPACING_MARK || type == java.lang.Character.ENCLOSING_MARK; }