static void setupRange(Token range, String src) { int len = src.length(); for (int i = 0; i < len; i += 2) range.addRange(src.charAt(i), src.charAt(i+1)); }
static void setupRange(Token range, String src) { int len = src.length(); for (int i = 0; i < len; i += 2) range.addRange(src.charAt(i), src.charAt(i + 1)); }
ranges[type].addRange(i, i); switch (type) { case Character.UPPERCASE_LETTER: ranges[type].addRange(i, i); ranges[Character.UNASSIGNED].addRange(0x10000, Token.UTF16_MAX); ranges[i].addRange(0x10000, Token.UTF16_MAX); r1.addRange(rstart, rend); } else { location = (i - NONBMP_BLOCK_START) * 2; r1.addRange(Token.nonBMPBlockRanges[location], Token.nonBMPBlockRanges[location + 1]); r1.addRange(0xfff0, 0xfffd); if (n.equals("Private Use")) { r1.addRange(0xF0000,0xFFFFD); r1.addRange(0x100000,0x10FFFD); Token.setAlias("UNASSIGNED", "Cn", true); Token all = Token.createRange(); all.addRange(0, Token.UTF16_MAX); Token.categories.put("ALL", all); Token.categories2.put("ALL", Token.complementRanges(all)); isword.addRange('_', '_'); Token.categories.put("IsWord", isword);
ranges[type].addRange(i, i); switch (type) { case Character.UPPERCASE_LETTER: ranges[type].addRange(i, i); ranges[Character.UNASSIGNED].addRange(0x10000, Token.UTF16_MAX); ranges[i].addRange(0x10000, Token.UTF16_MAX); r1.addRange(rstart, rend); } else { location = (i - NONBMP_BLOCK_START) * 2; r1.addRange(Token.nonBMPBlockRanges[location], Token.nonBMPBlockRanges[location + 1]); r1.addRange(0xfff0, 0xfffd); if (n.equals("Private Use")) { r1.addRange(0xF0000,0xFFFFD); r1.addRange(0x100000,0x10FFFD); Token.setAlias("UNASSIGNED", "Cn", true); Token all = Token.createRange(); all.addRange(0, Token.UTF16_MAX); Token.categories.put("ALL", all); Token.categories2.put("ALL", Token.complementRanges(all)); isword.addRange('_', '_'); Token.categories.put("IsWord", isword);
static synchronized Token getGraphemePattern() { if (Token.token_grapheme != null) return Token.token_grapheme; Token base_char = Token.createRange(); // [{ASSIGNED}]-[{M},{C}] base_char.mergeRanges(Token.getRange("ASSIGNED", true)); base_char.subtractRanges(Token.getRange("M", true)); base_char.subtractRanges(Token.getRange("C", true)); Token virama = Token.createRange(); for (int i = 0; i < Token.viramaString.length(); i ++) { virama.addRange(i, i); } Token combiner_wo_virama = Token.createRange(); combiner_wo_virama.mergeRanges(Token.getRange("M", true)); combiner_wo_virama.addRange(0x1160, 0x11ff); // hangul_medial and hangul_final combiner_wo_virama.addRange(0xff9e, 0xff9f); // extras Token left = Token.createUnion(); // base_char? left.addChild(base_char); left.addChild(Token.token_empty); Token foo = Token.createUnion(); foo.addChild(Token.createConcat(virama, Token.getRange("L", true))); foo.addChild(combiner_wo_virama); foo = Token.createClosure(foo); foo = Token.createConcat(left, foo); Token.token_grapheme = foo; return Token.token_grapheme; }
static synchronized Token getGraphemePattern() { if (Token.token_grapheme != null) return Token.token_grapheme; Token base_char = Token.createRange(); // [{ASSIGNED}]-[{M},{C}] base_char.mergeRanges(Token.getRange("ASSIGNED", true)); base_char.subtractRanges(Token.getRange("M", true)); base_char.subtractRanges(Token.getRange("C", true)); Token virama = Token.createRange(); for (int i = 0; i < Token.viramaString.length(); i++) { virama.addRange(i, i); } Token combiner_wo_virama = Token.createRange(); combiner_wo_virama.mergeRanges(Token.getRange("M", true)); combiner_wo_virama.addRange(0x1160, 0x11ff); // hangul_medial and hangul_final combiner_wo_virama.addRange(0xff9e, 0xff9f); // extras Token left = Token.createUnion(); // base_char? left.addChild(base_char); left.addChild(Token.token_empty); Token foo = Token.createUnion(); foo.addChild(Token.createConcat(virama, Token.getRange("L", true))); foo.addChild(combiner_wo_virama); foo = Token.createClosure(foo); foo = Token.createConcat(left, foo); Token.token_grapheme = foo; return Token.token_grapheme; }