static synchronized Token getGraphemePattern() { if (Token.token_grapheme != null) return Token.token_grapheme; Token base_char = Token.createRange(); // [{ASSIGNED}]-[{M},{C}] base_char.mergeRanges(Token.getRange("ASSIGNED", true)); base_char.subtractRanges(Token.getRange("M", true)); base_char.subtractRanges(Token.getRange("C", true)); Token virama = Token.createRange(); for (int i = 0; i < Token.viramaString.length(); i ++) { virama.addRange(i, i); } Token combiner_wo_virama = Token.createRange(); combiner_wo_virama.mergeRanges(Token.getRange("M", true)); combiner_wo_virama.addRange(0x1160, 0x11ff); // hangul_medial and hangul_final combiner_wo_virama.addRange(0xff9e, 0xff9f); // extras Token left = Token.createUnion(); // base_char? left.addChild(base_char); left.addChild(Token.token_empty); Token foo = Token.createUnion(); foo.addChild(Token.createConcat(virama, Token.getRange("L", true))); foo.addChild(combiner_wo_virama); foo = Token.createClosure(foo); foo = Token.createConcat(left, foo); Token.token_grapheme = foo; return Token.token_grapheme; }
static synchronized Token getGraphemePattern() { if (Token.token_grapheme != null) return Token.token_grapheme; Token base_char = Token.createRange(); // [{ASSIGNED}]-[{M},{C}] base_char.mergeRanges(Token.getRange("ASSIGNED", true)); base_char.subtractRanges(Token.getRange("M", true)); base_char.subtractRanges(Token.getRange("C", true)); Token virama = Token.createRange(); for (int i = 0; i < Token.viramaString.length(); i++) { virama.addRange(i, i); } Token combiner_wo_virama = Token.createRange(); combiner_wo_virama.mergeRanges(Token.getRange("M", true)); combiner_wo_virama.addRange(0x1160, 0x11ff); // hangul_medial and hangul_final combiner_wo_virama.addRange(0xff9e, 0xff9f); // extras Token left = Token.createUnion(); // base_char? left.addChild(base_char); left.addChild(Token.token_empty); Token foo = Token.createUnion(); foo.addChild(Token.createConcat(virama, Token.getRange("L", true))); foo.addChild(combiner_wo_virama); foo = Token.createClosure(foo); foo = Token.createConcat(left, foo); Token.token_grapheme = foo; return Token.token_grapheme; }
/** * regex ::= term (`|` term)* * term ::= factor+ * factor ::= ('^' | '$' | '\A' | '\Z' | '\z' | '\b' | '\B' | '\<' | '\>' * | atom (('*' | '+' | '?' | minmax ) '?'? )?) * | '(?=' regex ')' | '(?!' regex ')' | '(?<=' regex ')' | '(?<!' regex ')' * atom ::= char | '.' | range | '(' regex ')' | '(?:' regex ')' | '\' [0-9] * | '\w' | '\W' | '\d' | '\D' | '\s' | '\S' | category-block */ Token parseRegex() throws ParseException { Token tok = this.parseTerm(); Token parent = null; while (this.read() == T_OR) { this.next(); // '|' if (parent == null) { parent = Token.createUnion(); parent.addChild(tok); tok = parent; } tok.addChild(this.parseTerm()); } return tok; }
/** * term ::= factor+ */ Token parseTerm() throws ParseException { int ch = this.read(); if (ch == T_OR || ch == T_RPAREN || ch == T_EOF) { return Token.createEmpty(); } else { Token tok = this.parseFactor(); Token concat = null; while ((ch = this.read()) != T_OR && ch != T_RPAREN && ch != T_EOF) { if (concat == null) { concat = Token.createConcat(); concat.addChild(tok); tok = concat; } concat.addChild(this.parseFactor()); //tok = Token.createConcat(tok, this.parseFactor()); } return tok; } }
/** * term ::= factor+ */ Token parseTerm() throws ParseException { int ch = this.read(); if (ch == T_OR || ch == T_RPAREN || ch == T_EOF) { return Token.createEmpty(); } else { Token tok = this.parseFactor(); Token concat = null; while ((ch = this.read()) != T_OR && ch != T_RPAREN && ch != T_EOF) { if (concat == null) { concat = Token.createConcat(); concat.addChild(tok); tok = concat; } concat.addChild(this.parseFactor()); //tok = Token.createConcat(tok, this.parseFactor()); } return tok; } }
/** * regex ::= term (`|` term)* * term ::= factor+ * factor ::= ('^' | '$' | '\A' | '\Z' | '\z' | '\b' | '\B' | '\<' | '\>' * | atom (('*' | '+' | '?' | minmax ) '?'? )?) * | '(?=' regex ')' | '(?!' regex ')' | '(?<=' regex ')' | '(?<!' regex ')' * atom ::= char | '.' | range | '(' regex ')' | '(?:' regex ')' | '\' [0-9] * | '\w' | '\W' | '\d' | '\D' | '\s' | '\S' | category-block */ Token parseRegex() throws ParseException { Token tok = this.parseTerm(); Token parent = null; while (this.read() == T_OR) { this.next(); // '|' if (parent == null) { parent = Token.createUnion(); parent.addChild(tok); tok = parent; } tok.addChild(this.parseTerm()); } return tok; }
Token processQuestion(Token tok) throws ParseException { // X? -> X| this.next(); Token par = Token.createUnion(); if (this.read() == T_QUESTION) { this.next(); par.addChild(Token.createEmpty()); par.addChild(tok); } else { par.addChild(tok); par.addChild(Token.createEmpty()); } return par; } boolean checkQuestion(int off) {
Token processQuestion(Token tok) throws ParseException { // X? -> X| this.next(); Token par = Token.createUnion(); if (this.read() == T_QUESTION) { this.next(); par.addChild(Token.createEmpty()); par.addChild(tok); } else { par.addChild(tok); par.addChild(Token.createEmpty()); } return par; } boolean checkQuestion(int off) {