public FlexRegularExpressionLiteralChannel() { this.delegate = regexp(FlexTokenType.REGULAR_EXPRESSION_LITERAL, "/([^/\\n\\\\]*+(\\\\.)?+)*+/\\p{javaJavaIdentifierPart}*+"); }
public static RegexpChannel regexp(TokenType type, String... regexpPiece) { return new RegexpChannel(type, merge(regexpPiece)); }
public static Lexer create(SwiftConfiguration conf) { return Lexer.builder() .withCharset(conf.getCharset()) .withFailIfNoChannelToConsumeOneCharacter(false) // Comments .withChannel(commentRegexp("//[^\\n\\r]*+")) .withChannel(commentRegexp("/\\*[\\s\\S]*?\\*/")) // All other tokens .withChannel(regexp(LITERAL, "[^\r\n\\s/]+")) .withChannel(new BlackHoleChannel("[\\s]")) .build(); } }
.withChannel(commentRegexp("#[^\\n\\r]*+")) .withChannel(regexp(PythonTokenType.STRING, BYTES_PREFIX + "\'([^\'\\\\]*+(\\\\[\\s\\S])?+)*+\'")) .withChannel(regexp(PythonTokenType.STRING, BYTES_PREFIX + "\"([^\"\\\\]*+(\\\\[\\s\\S])?+)*+\"")) .withChannel(regexp(PythonTokenType.STRING, FORMATTED_STRING_PREFIX + "\'([^\'\\\\]*+(\\\\[\\s\\S])?+)*+\'")) .withChannel(regexp(PythonTokenType.STRING, FORMATTED_STRING_PREFIX + "\"([^\"\\\\]*+(\\\\[\\s\\S])?+)*+\"")) .withChannel(regexp(PythonTokenType.NUMBER, "[0-9]++(_?[0-9])*+\\.[0-9]*+(_?[0-9])*+" + EXP + "?+" + IMAGINARY_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "\\.[0-9]++(_?[0-9])*+" + EXP + "?+" + IMAGINARY_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "[0-9]++(_?[0-9])*+" + EXP + IMAGINARY_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "[0-9]++(_?[0-9])*+" + IMAGINARY_SUFFIX)) .withChannel(regexp(PythonTokenType.NUMBER, "0[oO]?+(_?[0-7])++" + LONG_INTEGER_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "0[xX](_?[0-9a-fA-F])++" + LONG_INTEGER_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "0[bB](_?[01])++" + LONG_INTEGER_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "[1-9](_?[0-9])*+" + LONG_INTEGER_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "0(_?0)*+" + LONG_INTEGER_SUFFIX + "?+")) .withChannel(new IdentifierAndKeywordChannel(and("[a-zA-Z_]", o2n("\\w")), true, PythonKeyword.values()))
.withChannel(commentRegexp("//", o2n("[^\\n\\r]"))) .withChannel(commentRegexp("/\\*", ANY_CHAR + "*?", "\\*/")) .withChannel(regexp(CSharpTokenType.STRING_LITERAL, "\"", o2n(or("\\\\.", anyButNot("\"", "\\n", "\\r"))), "\"")) .withChannel(regexp(CSharpTokenType.STRING_LITERAL, "@\"", o2n(or("\"\"", anyButNot("\""))), "\"")) .withChannel(regexp(CSharpTokenType.CHARACTER_LITERAL, "'", one2n(or("\\\\.", anyButNot("'", "\\n", "\\r"))), "'")) .withChannel(regexp(CSharpTokenType.REAL_LITERAL, o2n(DIGIT), "\\.", one2n(DIGIT), opt(EXP), opt(REAL_SUFFIX))) .withChannel(regexp(CSharpTokenType.REAL_LITERAL, one2n(DIGIT), EXP, opt(REAL_SUFFIX))) .withChannel(regexp(CSharpTokenType.REAL_LITERAL, one2n(DIGIT), REAL_SUFFIX)) .withChannel(regexp(CSharpTokenType.INTEGER_HEX_LITERAL, "0[xX]", one2n(HEXA_DIGIT), opt(INT_SUFFIX))) .withChannel(regexp(CSharpTokenType.INTEGER_DEC_LITERAL, one2n(DIGIT), opt(INT_SUFFIX))) .withChannel(new IdentifierAndKeywordChannel(g(opt("@"), or(LETTER_CHAR, "_"), o2n(or(LETTER_CHAR, DECIMAL_DIGIT_CHAR, CONNECTING_CHAR, COMBINING_CHAR, FORMATTING_CHAR))), true, CSharpKeyword.values())) .withChannel(new PunctuatorChannel(CSharpPunctuator.values())) .withChannel(regexp(CSharpTokenType.PREPROCESSOR, "#[^\\r\\n]*"))
public static Lexer create(PlSqlConfiguration conf) { return Lexer .builder() .withCharset(conf.getCharset()) .withFailIfNoChannelToConsumeOneCharacter(true) .withChannel(new BlackHoleChannel("\\s(?!&)")) .withChannel(commentRegexp(COMMENT)) .withChannel(regexp(PlSqlTokenType.SCIENTIFIC_LITERAL, SCIENTIFIC_LITERAL)) .withChannel(regexp(PlSqlTokenType.REAL_LITERAL, REAL_LITERAL)) .withChannel(regexp(PlSqlTokenType.INTEGER_LITERAL, INTEGER_LITERAL)) .withChannel(regexp(PlSqlTokenType.STRING_LITERAL, STRING_LITERAL)) .withChannel(regexp(PlSqlTokenType.DATE_LITERAL, DATE_LITERAL)) .withChannel(new IdentifierAndKeywordChannel(or(SIMPLE_IDENTIFIER, QUOTED_IDENTIFIER), false, PlSqlKeyword.values())) .withChannel(new RegexPunctuatorChannel(PlSqlPunctuator.values())) .withChannel(new BlackHoleChannel(and("\\s&&?", SIMPLE_IDENTIFIER))) .withChannel(new UnknownCharacterChannel()) .build(); } }
public static Lexer create(ObjectiveCConfiguration conf) { return Lexer.builder() .withCharset(conf.getCharset()) .withFailIfNoChannelToConsumeOneCharacter(false) // Comments .withChannel(commentRegexp("//[^\\n\\r]*+")) .withChannel(commentRegexp("/\\*[\\s\\S]*?\\*/")) // All other tokens .withChannel(regexp(LITERAL, "[^\r\n\\s/]+")) .withChannel(new BlackHoleChannel("[\\s]")) .build(); }
.withChannel(commentRegexp("#[^\\n\\r]*+")) .withChannel(regexp(PythonTokenType.STRING, BYTES_PREFIX + "\'([^\'\\\\]*+(\\\\[\\s\\S])?+)*+\'")) .withChannel(regexp(PythonTokenType.STRING, BYTES_PREFIX + "\"([^\"\\\\]*+(\\\\[\\s\\S])?+)*+\"")) .withChannel(regexp(PythonTokenType.NUMBER, "[0-9]++\\.[0-9]*+" + EXP + "?+" + IMAGINARY_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "\\.[0-9]++" + EXP + "?+" + IMAGINARY_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "[0-9]++" + EXP + IMAGINARY_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "[0-9]++" + IMAGINARY_SUFFIX)) .withChannel(regexp(PythonTokenType.NUMBER, "0[oO]?+[0-7]++" + LONG_INTEGER_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "0[xX][0-9a-fA-F]++" + LONG_INTEGER_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "0[bB][01]++" + LONG_INTEGER_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "[1-9][0-9]*+" + LONG_INTEGER_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "0++" + LONG_INTEGER_SUFFIX + "?+")) .withChannel(new IdentifierAndKeywordChannel(and("[a-zA-Z_]", o2n("\\w")), true, PythonKeyword.values()))
public static Lexer create(ObjectiveCConfiguration conf) { return Lexer.builder() .withCharset(conf.getCharset()) .withFailIfNoChannelToConsumeOneCharacter(false) // Comments .withChannel(commentRegexp("//[^\\n\\r]*+")) .withChannel(commentRegexp("/\\*[\\s\\S]*?\\*/")) // All other tokens .withChannel(regexp(LITERAL, "[^\r\n\\s/]+")) .withChannel(new BlackHoleChannel("[\\s]")) .build(); }
.withChannel(commentRegexp("#[^\\n\\r]*+")) .withChannel(regexp(PythonTokenType.STRING, BYTES_PREFIX + "\'([^\'\\\\]*+(\\\\[\\s\\S])?+)*+\'")) .withChannel(regexp(PythonTokenType.STRING, BYTES_PREFIX + "\"([^\"\\\\]*+(\\\\[\\s\\S])?+)*+\"")) .withChannel(regexp(PythonTokenType.STRING, FORMATTED_STRING_PREFIX + "\'([^\'\\\\]*+(\\\\[\\s\\S])?+)*+\'")) .withChannel(regexp(PythonTokenType.STRING, FORMATTED_STRING_PREFIX + "\"([^\"\\\\]*+(\\\\[\\s\\S])?+)*+\"")) .withChannel(regexp(PythonTokenType.NUMBER, "[0-9]++(_?[0-9])*+\\.[0-9]*+(_?[0-9])*+" + EXP + "?+" + IMAGINARY_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "\\.[0-9]++(_?[0-9])*+" + EXP + "?+" + IMAGINARY_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "[0-9]++(_?[0-9])*+" + EXP + IMAGINARY_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "[0-9]++(_?[0-9])*+" + IMAGINARY_SUFFIX)) .withChannel(regexp(PythonTokenType.NUMBER, "0[oO]?+(_?[0-7])++" + LONG_INTEGER_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "0[xX](_?[0-9a-fA-F])++" + LONG_INTEGER_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "0[bB](_?[01])++" + LONG_INTEGER_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "[1-9](_?[0-9])*+" + LONG_INTEGER_SUFFIX + "?+")) .withChannel(regexp(PythonTokenType.NUMBER, "0(_?0)*+" + LONG_INTEGER_SUFFIX + "?+")) .withChannel(new IdentifierAndKeywordChannel(and("[a-zA-Z_]", o2n("\\w")), true, PythonKeyword.values()))
public EcmaScriptRegexpChannel() { this.delegate = regexp(REGULAR_EXPRESSION_LITERAL, REGULAR_EXPRESSION); }
public static CommentRegexpChannel commentRegexp(String... regexpPiece) { return new CommentRegexpChannel(merge(regexpPiece)); }
public static Lexer create(Charset charset) { return Lexer.builder() .withCharset(charset) .withFailIfNoChannelToConsumeOneCharacter(true) .withChannel(new BomCharacterChannel()) .withChannel(new BlackHoleChannel("\\s++")) .withChannel(regexp(HtmlTokenType.DOCTYPE, "<!DOCTYPE.*>")) .withChannel(regexp(HtmlTokenType.TAG, "</?[:\\w]+>?")) .withChannel(regexp(HtmlTokenType.TAG, "/?>")) // JSP comment .withChannel(commentRegexp("<%--[\\w\\W]*?%>")) // HTML comment .withChannel(commentRegexp("<!--[\\w\\W]*?-->")) // C comment .withChannel(commentRegexp("/\\*[\\w\\W]*?\\*/")) // CPP comment .withChannel(commentRegexp("//[^\n\r]*")) .withChannel(regexp(HtmlTokenType.EXPRESSION, "<%[\\w\\W]*?%>")) .withChannel(regexp(HtmlTokenType.ATTRIBUTE, "=[\"']{1}[\\w\\W]*?[\"']{1}")) .withChannel(regexp(HtmlTokenType.ATTRIBUTE, "=[^\\s'\"=<>`]++")) .withChannel(new IdentifierAndKeywordChannel("\\w++", true, new TokenType[]{})) .withChannel(new UnknownCharacterChannel()) .build(); }
public static Lexer create(PerlConfiguration config) { return Lexer.builder() .withCharset(config.getCharset()) .withFailIfNoChannelToConsumeOneCharacter(true) .withChannel(new NewLineChannel()) .withChannel(new BlackHoleChannel("\\s")) .withChannel(commentRegexp("#[^\\n\\r]*+")) .withChannel(new PODChannel()) .withChannel(commentRegexp("__END__[\\n\\r].*+")) .withChannel(commentRegexp("__DATA__[\\n\\r].*+")) .withChannel(new StringLiteralsChannel()) .withChannel(new QuoteLikeChannel()) .withChannel(new HeredocChannel()) .withChannel(regexp(PerlTokenType.NUMBER, "[1-9][0-9]*+[.][0-9]++" + EXP)) .withChannel(regexp(PerlTokenType.NUMBER, "[1-9][0-9]*+[.][0-9]++")) .withChannel(regexp(PerlTokenType.NUMBER, "[1-9][0-9]*+" + EXP)) .withChannel(regexp(PerlTokenType.NUMBER, "[1-9][0-9]*+")) .withChannel(regexp(PerlTokenType.NUMBER, "0++")) .withChannel(new IdentifierAndKeywordChannel(and("[$%&@]?[a-zA-Z_]", o2n("\\w")), true, PerlKeyword.values())) .withChannel(new PunctuatorChannel(PerlPunctuator.values())) .withChannel(new UnknownCharacterChannel()) .build(); } }
public static RegexpChannel regexp(TokenType type, String... regexpPiece) { return new RegexpChannel(type, merge(regexpPiece)); }
/** * Creates a Lexer, contains all channels to analyze apex language. * * @param config apex configuration. * @return a lexer instance. */ public static Lexer create(ApexConfiguration config) { return Lexer.builder() .withCharset(config.getCharset()) .withFailIfNoChannelToConsumeOneCharacter(Boolean.TRUE) .withChannel(commentRegexp(SINGLE_LINE_PATTERN)) .withChannel(commentRegexp(MULTI_LINE_PATTERN)) .withChannel(regexp(ApexTokenType.DECIMAL_FLOATING_POINT_LITERAL, DECIMAL_FLOATING_POINT_LITERAL_PATTERN)) .withChannel(regexp(ApexTokenType.HEXADECIMAL_FLOATING_POINT_LITERAL, HEXADECIMAL_FLOATING_POINT_LITERAL_PATTERN)) .withChannel(regexp(ApexTokenType.INTEGER_LITERAL, INTEGER_LITERAL_PATTERN)) .withChannel(regexp(ApexTokenType.STRING, STRING_PATTERN)) .withChannel(new IdentifierAndKeywordChannel(IDENTIFIER_PATTERN, Boolean.FALSE, ApexKeyword.values())) .withChannel(new PunctuatorChannel(ApexPunctuator.values())) .withChannel(new BlackHoleChannel(BLACK_HOLE)) .build(); } }
public static RegexpChannel regexp(TokenType type, String... regexpPiece) { return new RegexpChannel(type, merge(regexpPiece)); }
public static Lexer create(FlexConfiguration conf) { return Lexer.builder() .withCharset(conf.getCharset()) .withFailIfNoChannelToConsumeOneCharacter(true) .withChannel(new BomCharacterChannel()) .withChannel(new BlackHoleChannel("\\s++")) // Comments .withChannel(commentRegexp("//[^\\n\\r]*+")) .withChannel(commentRegexp("/\\*[\\s\\S]*?\\*/")) // String Literals .withChannel(regexp(GenericTokenType.LITERAL, "\"([^\"\\\\]*+(\\\\[\\s\\S])?+)*+\"")) .withChannel(regexp(GenericTokenType.LITERAL, "\'([^\'\\\\]*+(\\\\[\\s\\S])?+)*+\'")) // Regular Expression Literal .withChannel(new FlexRegularExpressionLiteralChannel()) // Numbers .withChannel(regexp(FlexTokenType.NUMERIC_LITERAL, "0[xX][0-9a-fA-F]++")) .withChannel(regexp(FlexTokenType.NUMERIC_LITERAL, "[0-9]++\\.([0-9]++)?+" + EXP + "?+")) .withChannel(regexp(FlexTokenType.NUMERIC_LITERAL, "\\.[0-9]++" + EXP + "?+")) .withChannel(regexp(FlexTokenType.NUMERIC_LITERAL, "[0-9]++" + EXP + "?+")) .withChannel(new IdentifierAndKeywordChannel("\\p{javaJavaIdentifierStart}++\\p{javaJavaIdentifierPart}*+", true, FlexKeyword.values())) .withChannel(new PunctuatorChannel(FlexPunctuator.values())) .withChannel(new UnknownCharacterChannel()) .build(); }
public static CommentRegexpChannel commentRegexp(String... regexpPiece) { return new CommentRegexpChannel(merge(regexpPiece)); }
public static Lexer create(ScalaConfiguration conf) { return Lexer.builder() .withCharset(conf.getCharset()) .withFailIfNoChannelToConsumeOneCharacter(true) // Channels, which consumes more frequently should come first. // Whitespace character occurs more frequently than any other, and thus come first: .withChannel(new BlackHoleChannel("[" + LINE_TERMINATOR + WHITESPACE + "]++")) // Comments .withChannel(commentRegexp(COMMENT)) // String Literals .withChannel(regexp(GenericTokenType.LITERAL, LITERAL)) .withChannel(regexp(ScalaTokenType.ANNOTATION, ANNOTATION)) // Regular Expression Literals .withChannel(regexp(ScalaTokenType.NUMERIC_LITERAL, NUMERIC_LITERAL)) .withChannel(new IdentifierAndKeywordChannel(IDENTIFIER, true, ScalaKeyword.values())) .withChannel(new PunctuatorChannel(ScalaPunctuator.values())) .withChannel(new UnknownCharacterChannel()) .build(); }