/** * To eliminate immediate left recursion - factor out non recursive alternatives. */ public static Grammar eliminatedImmediateLeftRecursion() { LexerlessGrammarBuilder b = LexerlessGrammarBuilder.create(); b.rule(A).is(b.firstOf(S1, S2), b.zeroOrMore(b.firstOf(T1, T2))); otherRules(b); return b.build(); }
private static void keywords(LexerlessGrammarBuilder b) { for (FlexKeyword k : FlexKeyword.values()) { b.rule(k).is(SPACING, k.getValue(), b.nextNot(IDENTIFIER_PART)); } List<FlexKeyword> keywords = FlexKeyword.keywords(); Object[] rest = new Object[keywords.size() - 2]; for (int i = 2; i < keywords.size(); i++) { rest[i - 2] = keywords.get(i); } b.rule(KEYWORDS).is(b.firstOf(keywords.get(0), keywords.get(1), rest)); }
public static Grammar requiresNegativeMemoization() { LexerlessGrammarBuilder b = LexerlessGrammarBuilder.create(); b.rule(A).is( 'a', b.firstOf( b.sequence(A, 'b'), b.sequence(A, 'c'))); return b.build(); }
/** * To eliminate indirect left recursion - transform to immediate left recursion, then factor out non recursive alternatives. */ public static Grammar eliminatedIndirectLeftRecursion() { LexerlessGrammarBuilder b = LexerlessGrammarBuilder.create(); b.rule(A).is(b.firstOf(b.sequence(S2, T1), S1), b.zeroOrMore(T2, T1)); otherRules(b); return b.build(); }
private static void functions(LexerlessGrammarBuilder b) { b.rule(spec).is(minus, b.firstOf(semiKeyword("spec", b), semiKeyword("callback", b)), b.zeroOrMore(b.firstOf(b.regexp("\\.(\\.+|.)"), b.regexp("[^\\.]")), spacing), dot); b.rule(typeSpec).is(minus, b.firstOf(semiKeyword("type", b), semiKeyword("opaque", b)), b.zeroOrMore(b.firstOf(b.regexp("\\.(\\.+|.)"), b.regexp("[^\\.]")), spacing), dot); b.rule(functionDeclaration).is(functionClause, b.zeroOrMore(semi, functionClause), dot); b.rule(functionClause).is(clauseHead, arrow, clauseBody); b.rule(clauseHead).is(funcDecl, b.optional(guardSequenceStart)); b.rule(clauseBody).is(statements); b.rule(funcArity).is(b.optional(literal, colon), literal, div, literal); b.rule(funcDecl).is(literal, arguments); }
/** * @see #eliminatedImmediateLeftRecursion() */ public static Grammar immediateLeftRecursion() { LexerlessGrammarBuilder b = LexerlessGrammarBuilder.create(); b.rule(A).is(b.firstOf( b.sequence(A, T1), b.sequence(A, T2), S1, S2)); otherRules(b); return b.build(); }
public static Grammar requiresPositiveMemoization() { LexerlessGrammarBuilder b = LexerlessGrammarBuilder.create(); b.rule(A).is( b.firstOf( b.sequence(b.optional(B), 'a'), b.sequence(b.optional(B), 'b'))); b.rule(B).is('(', A, ')'); return b.build(); }
private static void keywords(LexerlessGrammarBuilder b) { b.rule(LETTER_OR_DIGIT).is(javaIdentifierPart(b)); for (JavaKeyword tokenType : JavaKeyword.values()) { b.rule(tokenType).is(tokenType.getValue(), b.nextNot(LETTER_OR_DIGIT), SPACING); } String[] keywords = JavaKeyword.keywordValues(); Arrays.sort(keywords); ArrayUtils.reverse(keywords); b.rule(KEYWORD).is( b.firstOf( keywords[0], keywords[1], ArrayUtils.subarray(keywords, 2, keywords.length)), b.nextNot(LETTER_OR_DIGIT)); }
private static void keywords(LexerlessGrammarBuilder b) { b.rule(LETTER_OR_DIGIT).is(javaIdentifierPart(b)); for (JavaKeyword tokenType : JavaKeyword.values()) { b.rule(tokenType).is(tokenType.getValue(), b.nextNot(LETTER_OR_DIGIT), SPACING); } String[] keywords = JavaKeyword.keywordValues(); Arrays.sort(keywords); ArrayUtils.reverse(keywords); b.rule(KEYWORD).is( b.firstOf( keywords[0], keywords[1], ArrayUtils.subarray(keywords, 2, keywords.length)), b.nextNot(LETTER_OR_DIGIT)); }
public static Grammar requiresPositiveMemoizationOnMoreThanJustLastRule() { LexerlessGrammarBuilder b = LexerlessGrammarBuilder.create(); b.rule(A).is( b.firstOf( b.sequence(b.optional(B), 'a'), // rule 'C' will match and override the memoization result of 'B': b.sequence(C, '!'), b.sequence(b.optional(B), 'b'))); b.rule(B).is('(', A, ')'); // rule 'C' will override each following memoization result of 'A': b.rule(C).is('(', b.optional(C)); return b.build(); }
private static void embeddedCss(LexerlessGrammarBuilder b) { String openingHtmlStyleTagRegex = "<style[^>]+type[\\s]*=[\\s]*\"text/css\"[^>]*>"; b.rule(OPENING_HTML_STYLE_TAG).is( SPACING, b.token( GenericTokenType.LITERAL, b.regexp(openingHtmlStyleTagRegex))); b.rule(CLOSING_HTML_STYLE_TAG).is( SPACING, b.token( GenericTokenType.LITERAL, b.regexp("</style[\\s]*>"))); b.rule(NON_CSS_TOKEN).is( SPACING, b.token( GenericTokenType.LITERAL, b.firstOf( b.regexp(".+?(?=" + openingHtmlStyleTagRegex + ")"), b.regexp(".+")))); }
public static LexerlessGrammarBuilder createGrammarBuilder() { LexerlessGrammarBuilder b = LexerlessGrammarBuilder.create(); b.rule(WHITESPACE).is(b.commentTrivia(b.regexp("\\s*+"))).skip(); b.rule(PLUS).is('+', WHITESPACE); b.rule(MINUS).is('-', WHITESPACE); b.rule(DIV).is('/', WHITESPACE); b.rule(MUL).is('*', WHITESPACE); b.rule(NUMBER).is(b.regexp("[0-9]++"), WHITESPACE); b.rule(VARIABLE).is(b.regexp("\\p{javaJavaIdentifierStart}++\\p{javaJavaIdentifierPart}*+"), WHITESPACE); b.rule(LPAR).is('(', WHITESPACE); b.rule(RPAR).is(')', WHITESPACE); b.rule(EXPRESSION).is(WHITESPACE, ADDITIVE_EXPRESSION, b.endOfInput()); b.rule(ADDITIVE_EXPRESSION).is(MULTIPLICATIVE_EXPRESSION, b.zeroOrMore(b.firstOf(PLUS, MINUS), MULTIPLICATIVE_EXPRESSION)); b.rule(MULTIPLICATIVE_EXPRESSION).is(PRIMARY, b.zeroOrMore(b.firstOf(DIV, MUL), PRIMARY)).skipIfOneChild(); b.rule(PRIMARY).is(b.firstOf(NUMBER, PARENS, VARIABLE)).skipIfOneChild(); b.rule(PARENS).is(LPAR, ADDITIVE_EXPRESSION, RPAR); b.setRootRule(EXPRESSION); return b; }
public static Grammar create() { LexerlessGrammarBuilder b = LexerlessGrammarBuilder.create(); b.rule(JSON).is(b.firstOf(ARRAY, OBJECT)); b.rule(OBJECT).is("{", WHITESPACE, b.optional(PAIR, b.zeroOrMore(",", WHITESPACE, PAIR)), "}", WHITESPACE); b.rule(PAIR).is(STRING, ":", WHITESPACE, VALUE); b.rule(ARRAY).is("[", WHITESPACE, b.optional(VALUE, b.zeroOrMore(",", WHITESPACE, VALUE)), "]", WHITESPACE); b.rule(STRING).is('"', b.regexp("([^\"\\\\]|\\\\([\"\\\\/bfnrt]|u[0-9a-fA-F]{4}))*+"), '"', WHITESPACE); b.rule(VALUE).is(b.firstOf(STRING, NUMBER, OBJECT, ARRAY, TRUE, FALSE, NULL), WHITESPACE); b.rule(NUMBER).is(b.regexp("-?+(0|[1-9][0-9]*+)(\\.[0-9]++)?+([eE][+-]?+[0-9]++)?+")); b.rule(TRUE).is("true"); b.rule(FALSE).is("false"); b.rule(NULL).is("null"); b.rule(WHITESPACE).is(b.regexp("[ \n\r\t\f]*+")); return b.build(); }
private static void keywords(LexerlessGrammarBuilder b) { b.rule(LETTER_OR_DIGIT).is(b.regexp("\\p{javaJavaIdentifierPart}")); Object[] rest = new Object[EcmaScriptKeyword.values().length - 2]; for (int i = 0; i < EcmaScriptKeyword.values().length; i++) { EcmaScriptKeyword tokenType = EcmaScriptKeyword.values()[i]; b.rule(tokenType).is(SPACING, tokenType.getValue(), b.nextNot(LETTER_OR_DIGIT)); if (i > 1) { rest[i - 2] = tokenType.getValue(); } } b.rule(KEYWORD).is(b.firstOf( EcmaScriptKeyword.keywordValues()[0], EcmaScriptKeyword.keywordValues()[1], rest), b.nextNot(LETTER_OR_DIGIT)); }
private static void literals(LexerlessGrammarBuilder b) { b.rule(STRING).is(SPACING, b.regexp(STRING_REGEXP)); b.rule(HEXADECIMAL).is(SPACING, b.regexp("0[xX][0-9a-fA-F]++")); b.rule(OCTAL).is(SPACING, b.regexp("0[0-7]++")); b.rule(DECIMAL).is(SPACING, b.regexp(DECIMAL_REGEXP)); b.rule(NUMBER).is(b.firstOf(OCTAL, DECIMAL, HEXADECIMAL)); // Regular expression according to ECMA 262 b.rule(REGULAR_EXPRESSION).is(SPACING, b.regexp( "/" // Regular expression first char + "([^\\n\\r\\*\\\\/]|(\\\\[^\\n\\r]))" // Regular expression chars + "([^\\n\\r\\\\/]|(\\\\[^\\n\\r]))*" + "/" // Regular expression flags + IDENTIFIER_PART_REGEXP + "*+")); }
private static void keywords(LexerlessGrammarBuilder b) { Object[] rest = new Object[PHPKeyword.values().length - 2]; for (int i = 0; i < PHPKeyword.values().length; i++) { PHPKeyword tokenType = PHPKeyword.values()[i]; // PHP keywords are case insensitive b.rule(tokenType).is(SPACING, b.regexp("(?i)" + tokenType.getValue()), b.nextNot(b.regexp(LexicalConstant.IDENTIFIER_PART))).skip(); if (i > 1) { rest[i - 2] = b.regexp("(?i)" + tokenType.getValue()); } } b.rule(KEYWORDS).is(SPACING, b.firstOf( PHPKeyword.getKeywordValues()[0], PHPKeyword.getKeywordValues()[1], rest), b.nextNot(b.regexp(LexicalConstant.IDENTIFIER_PART)) ); }
private static void keywords(LexerlessGrammarBuilder b) { Object[] rest = new Object[PHPKeyword.values().length - 2]; for (int i = 0; i < PHPKeyword.values().length; i++) { PHPKeyword tokenType = PHPKeyword.values()[i]; // PHP keywords are case insensitive b.rule(tokenType).is(SPACING, b.regexp("(?i)" + tokenType.getValue()), b.nextNot(b.regexp(LexicalConstant.IDENTIFIER_PART))).skip(); if (i > 1) { rest[i - 2] = b.regexp("(?i)" + tokenType.getValue()); } } b.rule(KEYWORDS).is(SPACING, b.firstOf( PHPKeyword.getKeywordValues()[0], PHPKeyword.getKeywordValues()[1], rest), b.nextNot(b.regexp(LexicalConstant.IDENTIFIER_PART)) ); }
/** * 3.10. Literals */ private static void literals(LexerlessGrammarBuilder b) { b.rule(SPACING).is( b.skippedTrivia(whitespace(b)), b.zeroOrMore( b.commentTrivia(b.firstOf(inlineComment(b), multilineComment(b))), b.skippedTrivia(whitespace(b)))); b.rule(EOF).is(b.token(GenericTokenType.EOF, b.endOfInput())); b.rule(CHARACTER_LITERAL).is(characterLiteral(b), SPACING); b.rule(JavaTokenType.LITERAL).is(stringLiteral(b), SPACING); b.rule(FLOAT_LITERAL).is(b.regexp(FLOATING_LITERAL_WITHOUT_SUFFIX_REGEXP + "[fF]|[0-9][0-9_]*+[fF]"), SPACING); b.rule(DOUBLE_LITERAL).is(b.regexp(FLOATING_LITERAL_WITHOUT_SUFFIX_REGEXP + "[dD]?+|[0-9][0-9_]*+[dD]"), SPACING); b.rule(LONG_LITERAL).is(b.regexp(INTEGER_LITERAL_REGEXP + "[lL]"), SPACING); b.rule(INTEGER_LITERAL).is(b.regexp(INTEGER_LITERAL_REGEXP), SPACING); b.rule(JavaTokenType.IDENTIFIER).is( b.firstOf( b.next(ENUM), b.nextNot(KEYWORD)), javaIdentifier(b), SPACING); }
/** * 3.10. Literals */ private static void literals(LexerlessGrammarBuilder b) { b.rule(SPACING).is( b.skippedTrivia(whitespace(b)), b.zeroOrMore( b.commentTrivia(b.firstOf(inlineComment(b), multilineComment(b))), b.skippedTrivia(whitespace(b)))); b.rule(EOF).is(b.token(GenericTokenType.EOF, b.endOfInput())); b.rule(CHARACTER_LITERAL).is(characterLiteral(b), SPACING); b.rule(JavaTokenType.STRING_LITERAL).is(stringLiteral(b), SPACING); b.rule(FLOAT_LITERAL).is(b.regexp(FLOATING_LITERAL_WITHOUT_SUFFIX_REGEXP + "[fF]|[0-9][0-9_]*+[fF]"), SPACING); b.rule(DOUBLE_LITERAL).is(b.regexp(FLOATING_LITERAL_WITHOUT_SUFFIX_REGEXP + "[dD]?+|[0-9][0-9_]*+[dD]"), SPACING); b.rule(LONG_LITERAL).is(b.regexp(INTEGER_LITERAL_REGEXP + "[lL]"), SPACING); b.rule(INTEGER_LITERAL).is(b.regexp(INTEGER_LITERAL_REGEXP), SPACING); b.rule(JavaTokenType.IDENTIFIER).is( b.firstOf( b.next(ENUM), b.nextNot(KEYWORD)), javaIdentifier(b), SPACING); }