public static Grammar create() { LexerlessGrammarBuilder b = LexerlessGrammarBuilder.create(); b.rule(JSON).is(b.firstOf(ARRAY, OBJECT)); b.rule(OBJECT).is("{", WHITESPACE, b.optional(PAIR, b.zeroOrMore(",", WHITESPACE, PAIR)), "}", WHITESPACE); b.rule(PAIR).is(STRING, ":", WHITESPACE, VALUE); b.rule(ARRAY).is("[", WHITESPACE, b.optional(VALUE, b.zeroOrMore(",", WHITESPACE, VALUE)), "]", WHITESPACE); b.rule(STRING).is('"', b.regexp("([^\"\\\\]|\\\\([\"\\\\/bfnrt]|u[0-9a-fA-F]{4}))*+"), '"', WHITESPACE); b.rule(VALUE).is(b.firstOf(STRING, NUMBER, OBJECT, ARRAY, TRUE, FALSE, NULL), WHITESPACE); b.rule(NUMBER).is(b.regexp("-?+(0|[1-9][0-9]*+)(\\.[0-9]++)?+([eE][+-]?+[0-9]++)?+")); b.rule(TRUE).is("true"); b.rule(FALSE).is("false"); b.rule(NULL).is("null"); b.rule(WHITESPACE).is(b.regexp("[ \n\r\t\f]*+")); return b.build(); }
public static LexerlessGrammarBuilder createGrammarBuilder() { LexerlessGrammarBuilder b = LexerlessGrammarBuilder.create(); b.rule(IDENTIFIER_NAME).is( SPACING, b.regexp(EcmaScriptLexer.IDENTIFIER)); lexical(b); b.setRootRule(SCRIPT); return b; }
/** * 3.10. Literals */ private static void literals(LexerlessGrammarBuilder b) { b.rule(SPACING).is( b.skippedTrivia(whitespace(b)), b.zeroOrMore( b.commentTrivia(b.firstOf(inlineComment(b), multilineComment(b))), b.skippedTrivia(whitespace(b)))); b.rule(EOF).is(b.token(GenericTokenType.EOF, b.endOfInput())); b.rule(CHARACTER_LITERAL).is(characterLiteral(b), SPACING); b.rule(JavaTokenType.STRING_LITERAL).is(stringLiteral(b), SPACING); b.rule(FLOAT_LITERAL).is(b.regexp(FLOATING_LITERAL_WITHOUT_SUFFIX_REGEXP + "[fF]|[0-9][0-9_]*+[fF]"), SPACING); b.rule(DOUBLE_LITERAL).is(b.regexp(FLOATING_LITERAL_WITHOUT_SUFFIX_REGEXP + "[dD]?+|[0-9][0-9_]*+[dD]"), SPACING); b.rule(LONG_LITERAL).is(b.regexp(INTEGER_LITERAL_REGEXP + "[lL]"), SPACING); b.rule(INTEGER_LITERAL).is(b.regexp(INTEGER_LITERAL_REGEXP), SPACING); b.rule(JavaTokenType.IDENTIFIER).is( b.firstOf( b.next(ENUM), b.nextNot(KEYWORD)), javaIdentifier(b), SPACING); }
public static LexerlessGrammarBuilder createGrammarBuilder() { LexerlessGrammarBuilder b = LexerlessGrammarBuilder.create(); b.rule(YES).is("yes"); b.rule(NO).is("no"); b.rule(DOCUMENT).is(PROLOG, ELEMENT, b.zeroOrMore(MISC)); b.rule(CHAR).is(b.regexp(CHAR_REGEXP)); b.rule(S).is(b.skippedTrivia(b.regexp(S_REGEXP))).skip(); b.rule(NAME).is(b.regexp(NAME_REGEXP)); b.rule(NM_TOKEN).is(b.regexp(NM_TOKEN_REGEXP)); b.rule(ENTITY_VALUE).is( b.firstOf( b.sequence('"', b.zeroOrMore(b.firstOf(b.regexp("[^%&\"]++"), PE_REFERENCE, REFERENCE)), '"'), b.sequence('\'', b.zeroOrMore(b.firstOf(b.regexp("[^%&\']++"), PE_REFERENCE, REFERENCE)), '\''))); b.rule(ATT_VALUE).is( b.firstOf( b.sequence('"', b.zeroOrMore(b.firstOf(b.regexp("[^<&\"]++"), REFERENCE)), '"'), b.sequence('\'', b.zeroOrMore(b.firstOf(b.regexp("[^<&\']++"), REFERENCE)), '\''))); b.rule(SYSTEM_LITERAL).is( b.firstOf( b.sequence('"', b.regexp("[^\"]" + "*+"), '"'), b.sequence('\'', b.regexp("[^']" + "*+"), '\''))); b.rule(PUBID_LITERAL).is( b.firstOf( b.sequence('"', b.regexp(PUBID_CHAR_REGEXP + "*+"), '"'), b.sequence('\'', b.regexp("(?:(?!')" + PUBID_CHAR_REGEXP + ")*+"), '\'')));
public static LexerlessGrammar createGrammar() { LexerlessGrammarBuilder b = LexerlessGrammarBuilder.create(); b.rule(WHITESPACE).is(b.regexp("[" + LINE_TERMINATOR_REGEXP + WHITESPACE_REGEXP + "]*+")); b.rule(SPACING).is( b.skippedTrivia(WHITESPACE), b.zeroOrMore( b.commentTrivia(b.regexp("(?:" + SINGLE_LINE_COMMENT_REGEXP + "|" + MULTI_LINE_COMMENT_REGEXP + ")")), b.skippedTrivia(WHITESPACE)) ).skip(); b.rule(SPACING_NO_LB).is(b.zeroOrMore(b.firstOf( b.skippedTrivia(b.regexp("[\\s&&[^\n\r]]++")), b.commentTrivia(b.regexp("(?:" + SINGLE_LINE_COMMENT_REGEXP + "|" + MULTI_LINE_COMMENT_NO_LB_REGEXP + ")")) ))).skip(); b.rule(NEXT_NOT_LB).is(b.nextNot(b.regexp("(?:" + "[\n\r]" + "|" + MULTI_LINE_COMMENT_REGEXP + ")"))).skip(); b.rule(EOS).is(b.firstOf( b.sequence(SPACING, ";"), b.sequence(SPACING_NO_LB, b.regexp(NEWLINE_REGEXP)), b.sequence(SPACING_NO_LB, b.next("}")), b.sequence(SPACING, b.endOfInput()) )); b.rule(EOS_NO_LB).is(b.firstOf( b.sequence(SPACING_NO_LB, ";"), b.sequence(SPACING_NO_LB, b.regexp(NEWLINE_REGEXP)), b.sequence(SPACING_NO_LB, b.next("}")), b.sequence(SPACING_NO_LB, b.endOfInput()) ));
public static LexerlessGrammarBuilder createGrammarBuilder() { LexerlessGrammarBuilder b = LexerlessGrammarBuilder.createBasedOn(CssGrammar.createGrammarBuilder()); b.rule(CssGrammar.STATEMENT).override(b.firstOf(VAR_DECLARATION, CssGrammar.AT_RULE, CssGrammar.RULESET)); b.rule(VAR_DECLARATION).is(VARIABLE, CssGrammar.COLON, CssGrammar.VALUE, CssGrammar.SEMICOLON); b.rule(VARIABLE).is(CssGrammar.addSpacing(b.sequence("$", CssGrammar.IDENT), b)); b.rule(EXTEND).is(CssGrammar.addSpacing("@extend", b), CssGrammar.addSpacing(CssGrammar.SELECTOR, b), b.optional(OPT)); b.rule(OPT).is("!optional"); b.rule(SUB_DECLARATION).is( b.firstOf( EXTEND, NESTED_PROPERTY, b.rule(CssGrammar.SUP_DECLARATION).override( SUB_DECLARATION, b.zeroOrMore( b.firstOf(CssGrammar.SEMICOLON, SUB_DECLARATION) )).skip(); b.rule(CssGrammar.PROPERTY).override(b.firstOf(CssGrammar.addSpacing(VARIABLE, b), CssGrammar.addSpacing(CssGrammar.IDENT, b))); b.rule(NESTED_PROPERTY).is( CssGrammar.IDENT, CssGrammar.COLON, b.optional(CssGrammar.VALUE), CssGrammar.BLOCK); b.rule(PARENT_SELECTOR).is(CssGrammar.addSpacing("&", b), b.zeroOrMore(CssGrammar.SUB_S)); b.rule(CssGrammar.SIMPLE_SELECTOR).override( b.firstOf(
private static void less(LexerlessGrammarBuilder b) { b.rule(LESS_VARIABLE_PREFIX).is(SPACING, b.token(GenericTokenType.LITERAL, "@")); b.rule(LESS_IDENT_INTERPOLATED_IDENTIFIER_NO_WS).is( b.token(GenericTokenType.LITERAL, b.sequence( b.optional(IDENT_IDENTIFIER_NO_WS), b.sequence(LESS_VARIABLE_PREFIX, OPEN_CURLY_BRACE, b.oneOrMore(_NMCHAR), CLOSE_CURLY_BRACE), b.zeroOrMore( b.firstOf( b.sequence(LESS_VARIABLE_PREFIX, OPEN_CURLY_BRACE, b.oneOrMore(_NMCHAR), CLOSE_CURLY_BRACE), _NMCHAR))))); b.rule(LESS_IDENT_IDENTIFIER_NO_WS_NOR_WHEN).is(b.token(GenericTokenType.LITERAL, b.sequence( b.nextNot(b.regexp("when[\\s]+|when\\(")), _IDENT))); b.rule(LESS_IDENT_INTERPOLATED_IDENTIFIER).is(SPACING, LESS_IDENT_INTERPOLATED_IDENTIFIER_NO_WS); b.rule(LESS_EXTEND_KEYWORD).is("extend"); b.rule(LESS_EXTEND_PREFIX).is(SPACING, b.token(GenericTokenType.LITERAL, b.sequence(":", LESS_EXTEND_KEYWORD))); b.rule(LESS_PARENT_SELECTOR_KEYWORD).is("&"); b.rule(LESS_MIXIN_GUARD_WHEN).is(SPACING, b.token(GenericTokenType.LITERAL, "when")); b.rule(LESS_MIXIN_GUARD_NOT).is(SPACING, b.token(GenericTokenType.LITERAL, "not")); b.rule(LESS_MIXIN_GUARD_AND).is(SPACING, b.token(GenericTokenType.LITERAL, "and")); b.rule(LESS_MIXIN_GUARD_OR).is(SPACING, b.token(GenericTokenType.LITERAL, ","));
public static LexerlessGrammarBuilder createGrammarBuilder() { LexerlessGrammarBuilder b = LexerlessGrammarBuilder.create(); b.rule(WHITESPACE).is(b.commentTrivia(b.regexp("\\s*+"))).skip(); b.rule(PLUS).is('+', WHITESPACE); b.rule(MINUS).is('-', WHITESPACE); b.rule(DIV).is('/', WHITESPACE); b.rule(MUL).is('*', WHITESPACE); b.rule(NUMBER).is(b.regexp("[0-9]++"), WHITESPACE); b.rule(VARIABLE).is(b.regexp("\\p{javaJavaIdentifierStart}++\\p{javaJavaIdentifierPart}*+"), WHITESPACE); b.rule(LPAR).is('(', WHITESPACE); b.rule(RPAR).is(')', WHITESPACE); b.rule(EXPRESSION).is(WHITESPACE, ADDITIVE_EXPRESSION, b.endOfInput()); b.rule(ADDITIVE_EXPRESSION).is(MULTIPLICATIVE_EXPRESSION, b.zeroOrMore(b.firstOf(PLUS, MINUS), MULTIPLICATIVE_EXPRESSION)); b.rule(MULTIPLICATIVE_EXPRESSION).is(PRIMARY, b.zeroOrMore(b.firstOf(DIV, MUL), PRIMARY)).skipIfOneChild(); b.rule(PRIMARY).is(b.firstOf(NUMBER, PARENS, VARIABLE)).skipIfOneChild(); b.rule(PARENS).is(LPAR, ADDITIVE_EXPRESSION, RPAR); b.setRootRule(EXPRESSION); return b; }
private static void tokens(LexerlessGrammarBuilder b) { b.rule(IDENT).is(addSpacing(_IDENT, b)); b.rule(identNoWS).is(_IDENT); b.rule(AT_KEYWORD).is(addSpacing(b.sequence("@", IDENT), b)); b.rule(STRING).is(addSpacing(_STRING, b)); b.rule(BAD_STRING).is(_BAD_STRING); // TODO: do we need this? b.rule(BAD_URI).is(_BADURI); // TODO: do we need this? b.rule(BAD_COMMENT).is(_BAD_COMMENT); // TODO: do we need this? b.rule(HASH).is(addSpacing(b.sequence("#", _NAME), b)); b.rule(NUMBER).is(addSpacing(_NUM, b)); b.rule(PERCENTAGE).is(addSpacing(b.sequence(NUMBER, "%"), b)); b.rule(DIMENSION).is(addSpacing(b.sequence(NUMBER, unit), b)); b.rule(unit).is(b.firstOf("em", "ex", "ch", "rem", "vw", "vh", "vmin", "vmax", "cm", "mm", "in", "px", "pt", "pc")); b.rule(URI).is( addSpacing( b.firstOf(b.sequence("url(", _W, STRING, _W, CLOSE_PARENTHESIS), b .sequence("url(", _W, b.zeroOrMore(b.firstOf( b.regexp("[!#$%&*-\\[\\]-~]"), _NONASCII, _ESCAPE)), _W, CLOSE_PARENTHESIS)), b)); b.rule(UNICODE_RANGE) .is(addSpacing(b.regexp("u\\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?"), b)); b.rule(COLON).is(addSpacing(":", b)); b.rule(SEMICOLON).is(addSpacing(";", b)); b.rule(OPEN_CURLY_BRACE).is(addSpacing("{", b)); b.rule(CLOSE_CURLY_BRACE).is(addSpacing("}", b)); b.rule(OPEN_PARENTHESIS).is(addSpacing("(", b)); b.rule(CLOSE_PARENTHESIS).is(addSpacing(")", b)); b.rule(OPEN_BRACKET).is(addSpacing("[", b)); b.rule(CLOSE_BRACKET).is(addSpacing("]", b)); b.rule(comma).is(addSpacing(",", b));
private static void directives(LexerlessGrammarBuilder b) { b.rule(DIRECTIVE).is(b.firstOf( CONFIG_CONDITION, EMPTY_STATEMENT, STATEMENT, DEFAULT_XML_NAMESPACE_DIRECTIVE, b.sequence(ATTRIBUTES, /* No line break */ SPACING_NO_LB, NEXT_NOT_LB, ANNOTABLE_DIRECTIVE), b.sequence(INCLUDE_DIRECTIVE, /* No line break */ EOS_NO_LB), b.sequence(IMPORT_DIRECTIVE, /* No line break */ EOS_NO_LB), b.rule(CONFIG_CONDITION).is(IDENTIFIER, DOUBLE_COLON, IDENTIFIER, LCURLYBRACE, DIRECTIVES, RCURLYBRACE); b.rule(ANNOTABLE_DIRECTIVE).is(b.firstOf( VARIABLE_DECLARATION_STATEMENT, FUNCTION_DEF, b.rule(DIRECTIVES).is(b.zeroOrMore(DIRECTIVE)); b.rule(ATTRIBUTES).is(b.oneOrMore(ATTRIBUTE)); b.rule(ATTRIBUTE_COMBINATION).is(ATTRIBUTE, /* No line break */ SPACING_NO_LB, NEXT_NOT_LB, ATTRIBUTES); b.rule(ATTRIBUTE).is(b.firstOf( b.sequence(/* hack: */b.nextNot(NAMESPACE), ATTRIBUTE_EXPR), RESERVED_NAMESPACE, b.sequence(LBRAKET, ASSIGNMENT_EXPR, RBRAKET))); b.rule(ATTRIBUTE_EXPR).is(IDENTIFIER, b.zeroOrMore(PROPERTY_OPERATOR)); b.rule(IMPORT_DIRECTIVE).is(IMPORT, PACKAGE_NAME, b.optional(DOT, STAR)); b.rule(INCLUDE_DIRECTIVE).is(INCLUDE, /* No line break */ SPACING_NO_LB, NEXT_NOT_LB, STRING);
public static Grammar infiniteZeroOrMore() { LexerlessGrammarBuilder b = LexerlessGrammarBuilder.create(); b.rule(A).is(b.zeroOrMore(b.optional("foo"))); return b.build(); }
@Setup public void setup() { int k = Integer.getInteger("k", 5); int n = Integer.getInteger("n", 10); input = Strings.repeat(" k" + (k - 1), n).toCharArray(); GrammarRuleKey root = newRuleKey(); GrammarRuleKey spacing = newRuleKey(); GrammarRuleKey[] rules = new GrammarRuleKey[k]; for (int i = 0; i < k; i++) { rules[i] = newRuleKey(); } LexerlessGrammarBuilder b = LexerlessGrammarBuilder.create(); b.rule(root).is(b.zeroOrMore(b.firstOf(rules[0], rules[1], Arrays.copyOfRange(rules, 2, rules.length))), b.endOfInput()); b.rule(spacing).is(" "); for (int i = 0; i < k; i++) { b.rule(rules[i]).is(b.optional(spacing), "k" + i); } required = new ParseRunner(b.build().rule(root)); b = LexerlessGrammarBuilder.create(); b.rule(root).is(b.zeroOrMore(b.firstOf(rules[0], rules[1], Arrays.copyOfRange(rules, 2, rules.length))), b.endOfInput()); b.rule(spacing).is(b.optional(" ")); for (int i = 0; i < k; i++) { b.rule(rules[i]).is(spacing, "k" + i); } notRequired = new ParseRunner(b.build().rule(root)); }
public static LexerlessGrammarBuilder createGrammarBuilder() { LexerlessGrammarBuilder b = LexerlessGrammarBuilder.create(); punctuators(b); keywords(b); literals(b); b.setRootRule(COMPILATION_UNIT); return b; }
private static void spacing(LexerlessGrammarBuilder b, String commentRegex) { b.rule(SPACING).is( b.skippedTrivia(b.regexp("(?<!\\\\)[\\s]*+")), b.zeroOrMore( b.commentTrivia(b.regexp(commentRegex)), b.skippedTrivia(b.regexp("(?<!\\\\)[\\s]*+")))); }
private static void keywords(LexerlessGrammarBuilder b) { Object[] rest = new Object[PHPKeyword.values().length - 2]; for (int i = 0; i < PHPKeyword.values().length; i++) { PHPKeyword tokenType = PHPKeyword.values()[i]; // PHP keywords are case insensitive b.rule(tokenType).is(SPACING, b.regexp("(?i)" + tokenType.getValue()), b.nextNot(b.regexp(LexicalConstant.IDENTIFIER_PART))).skip(); if (i > 1) { rest[i - 2] = b.regexp("(?i)" + tokenType.getValue()); } } b.rule(KEYWORDS).is(SPACING, b.firstOf( PHPKeyword.getKeywordValues()[0], PHPKeyword.getKeywordValues()[1], rest), b.nextNot(b.regexp(LexicalConstant.IDENTIFIER_PART)) ); }