private boolean processNewLine(CodeReader code, Lexer output) { if (output.getTokens().isEmpty() || (output.getTokens().get(output.getTokens().size() - 1).getType().equals(PerlTokenType.NEWLINE))) { // Blank line consumeEOL(code); return true; } // NEWLINE token output.addToken(Token.builder() .setLine(code.getLinePosition()) .setColumn(code.getColumnPosition()) .setURI(output.getURI()) .setType(PerlTokenType.NEWLINE) .setValueAndOriginalValue("\n") .setGeneratedCode(true) .build()); consumeEOL(code); return false; }
public void addToken(Token... tokens) { if (tokens.length <= 0) { throw new IllegalArgumentException("at least one token must be given"); } Token firstToken = tokens[0]; Token firstTokenWithTrivia; // Performance optimization: no need to rebuild token, if there is no trivia if (trivia.isEmpty() && !firstToken.hasTrivia()) { firstTokenWithTrivia = firstToken; } else { firstTokenWithTrivia = Token.builder(firstToken).setTrivia(trivia).build(); trivia.clear(); } this.tokens.add(firstTokenWithTrivia); if (tokens.length > 1) { this.tokens.addAll(Arrays.asList(tokens).subList(1, tokens.length)); } }
} else if (ruleMatcher.getTriviaKind() == TriviaKind.COMMENT) { updateTokenPositionAndValue(node); tokenBuilder.setTrivia(Collections.<Trivia>emptyList()); tokenBuilder.setType(GenericTokenType.COMMENT); trivias.add(Trivia.createComment(tokenBuilder.build())); return null; } else { updateTokenPositionAndValue(node); TokenExpression ruleMatcher = (TokenExpression) node.getMatcher(); tokenBuilder.setType(ruleMatcher.getTokenType()); if (ruleMatcher.getTokenType() == GenericTokenType.COMMENT) { tokenBuilder.setTrivia(Collections.<Trivia>emptyList()); trivias.add(Trivia.createComment(tokenBuilder.build())); return null; tokenBuilder.setType(UNDEFINED_TOKEN_TYPE); Token token = tokenBuilder.setTrivia(trivias).build(); trivias.clear(); AstNode astNode = new AstNode(token);
} else if (ruleMatcher.getTriviaKind() == TriviaKind.COMMENT) { updateTokenPositionAndValue(node); tokenBuilder.setTrivia(Collections.<Trivia>emptyList()); tokenBuilder.setType(GenericTokenType.COMMENT); trivias.add(Trivia.createComment(tokenBuilder.build())); return null; } else { updateTokenPositionAndValue(node); TokenExpression ruleMatcher = (TokenExpression) node.getMatcher(); tokenBuilder.setType(ruleMatcher.getTokenType()); if (ruleMatcher.getTokenType() == GenericTokenType.COMMENT) { tokenBuilder.setTrivia(Collections.<Trivia>emptyList()); trivias.add(Trivia.createComment(tokenBuilder.build())); return null; tokenBuilder.setType(UNDEFINED_TOKEN_TYPE); Token token = tokenBuilder.setTrivia(trivias).build(); trivias.clear(); AstNode astNode = new AstNode(token);
} else if (ruleMatcher.getTriviaKind() == TriviaKind.COMMENT) { updateTokenPositionAndValue(node); tokenBuilder.setTrivia(Collections.<Trivia>emptyList()); tokenBuilder.setType(GenericTokenType.COMMENT); trivias.add(Trivia.createComment(tokenBuilder.build())); return null; } else { updateTokenPositionAndValue(node); TokenExpression ruleMatcher = (TokenExpression) node.getMatcher(); tokenBuilder.setType(ruleMatcher.getTokenType()); if (ruleMatcher.getTokenType() == GenericTokenType.COMMENT) { tokenBuilder.setTrivia(Collections.<Trivia>emptyList()); trivias.add(Trivia.createComment(tokenBuilder.build())); return null; tokenBuilder.setType(UNDEFINED_TOKEN_TYPE); Token token = tokenBuilder.setTrivia(trivias).build(); trivias.clear(); AstNode astNode = new AstNode(token);
} else if (ruleMatcher.getTriviaKind() == TriviaKind.COMMENT) { updateTokenPositionAndValue(node); tokenBuilder.setTrivia(Collections.<Trivia>emptyList()); tokenBuilder.setType(GenericTokenType.COMMENT); trivias.add(Trivia.createComment(tokenBuilder.build())); return null; } else { updateTokenPositionAndValue(node); TokenExpression ruleMatcher = (TokenExpression) node.getMatcher(); tokenBuilder.setType(ruleMatcher.getTokenType()); if (ruleMatcher.getTokenType() == GenericTokenType.COMMENT) { tokenBuilder.setTrivia(Collections.<Trivia>emptyList()); trivias.add(Trivia.createComment(tokenBuilder.build())); return null; tokenBuilder.setType(UNDEFINED_TOKEN_TYPE); Token token = tokenBuilder.setTrivia(trivias).build(); trivias.clear(); AstNode astNode = new AstNode(token);
} else if (ruleMatcher.getTriviaKind() == TriviaKind.COMMENT) { updateTokenPositionAndValue(node); tokenBuilder.setTrivia(Collections.<Trivia>emptyList()); tokenBuilder.setType(GenericTokenType.COMMENT); trivias.add(Trivia.createComment(tokenBuilder.build())); return null; } else { updateTokenPositionAndValue(node); TokenExpression ruleMatcher = (TokenExpression) node.getMatcher(); tokenBuilder.setType(ruleMatcher.getTokenType()); if (ruleMatcher.getTokenType() == GenericTokenType.COMMENT) { tokenBuilder.setTrivia(Collections.<Trivia>emptyList()); trivias.add(Trivia.createComment(tokenBuilder.build())); return null; tokenBuilder.setType(UNDEFINED_TOKEN_TYPE); Token token = tokenBuilder.setTrivia(trivias).build(); trivias.clear(); AstNode astNode = new AstNode(token);
@Override public boolean consume(CodeReader code, Lexer lexer) { try { if (code.popTo(matcher, tmpBuilder) > 0) { String value = tmpBuilder.toString(); Token token = tokenBuilder .setType(COMMENT) .setValueAndOriginalValue(value) .setURI(lexer.getURI()) .setLine(code.getPreviousCursor().getLine()) .setColumn(code.getPreviousCursor().getColumn()) .build(); lexer.addTrivia(Trivia.createComment(token)); tmpBuilder.delete(0, tmpBuilder.length()); return true; } return false; } catch (StackOverflowError e) { throw new LexerException( "The regular expression " + regexp + " has led to a stack overflow error. " + "This error is certainly due to an inefficient use of alternations. See http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=5050507", e); } } }
public static List<Token> lex(String sourceCode) { List<Token> tokens = new ArrayList<>(); CodeReader reader = new CodeReader(sourceCode); Matcher matcher = Pattern.compile("[a-zA-Z_0-9\\+\\-\\*/]+").matcher(""); while (reader.peek() != -1) { StringBuilder nextStringToken = new StringBuilder(); Token token; int linePosition = reader.getLinePosition(); int columnPosition = reader.getColumnPosition(); if (reader.popTo(matcher, nextStringToken) != -1) { if ("EOF".equals(nextStringToken.toString())) { token = mockTokenBuilder(EOF, nextStringToken.toString()).setLine(linePosition).setColumn(columnPosition).build(); } else { token = mockTokenBuilder(IDENTIFIER, nextStringToken.toString()).setLine(linePosition).setColumn(columnPosition).build(); } } else if (Character.isWhitespace(reader.peek())) { reader.pop(); continue; } else { token = mockTokenBuilder(IDENTIFIER, Character.toString((char) reader.pop())).setLine(linePosition).setColumn(columnPosition).build(); } tokens.add(token); } return tokens; }
@Override public boolean consume(CodeReader code, Lexer lexer) { for (Map.Entry<TokenType, Matcher> tokenMatcher : tokenMatchers.entrySet()) { TokenType punctuator = tokenMatcher.getKey(); Matcher matcher = tokenMatcher.getValue(); try { if (code.popTo(matcher, tmpBuilder) > 0) { String value = tmpBuilder.toString(); Token token = tokenBuilder .setType(punctuator) .setValueAndOriginalValue(value) .setURI(lexer.getURI()) .setLine(code.getPreviousCursor().getLine()) .setColumn(code.getPreviousCursor().getColumn()) .build(); lexer.addToken(token); tmpBuilder.delete(0, tmpBuilder.length()); return true; } } catch (StackOverflowError e) { throw new LexerException("The regular expression " + punctuator.getValue() + " has led to a stack overflow error.", e); } } return false; } }
@Override public boolean consume(CodeReader code, Lexer lexer) { try { if (code.popTo(matcher, tmpBuilder) > 0) { String value = tmpBuilder.toString(); Token token = tokenBuilder .setType(COMMENT) .setValueAndOriginalValue(value) .setURI(lexer.getURI()) .setLine(code.getPreviousCursor().getLine()) .setColumn(code.getPreviousCursor().getColumn()) .build(); lexer.addTrivia(Trivia.createComment(token)); tmpBuilder.delete(0, tmpBuilder.length()); return true; } return false; } catch (StackOverflowError e) { throw new LexerException( "The regular expression " + regexp + " has led to a stack overflow error. " + "This error is certainly due to an inefficient use of alternations. See http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=5050507", e); } } }
@Override public boolean consume(CodeReader code, Lexer lexer) { try { if (code.popTo(matcher, tmpBuilder) > 0) { String value = tmpBuilder.toString(); Token token = tokenBuilder .setType(COMMENT) .setValueAndOriginalValue(value) .setURI(lexer.getURI()) .setLine(code.getPreviousCursor().getLine()) .setColumn(code.getPreviousCursor().getColumn()) .build(); lexer.addTrivia(Trivia.createComment(token)); tmpBuilder.delete(0, tmpBuilder.length()); return true; } return false; } catch (StackOverflowError e) { throw new LexerException( "The regular expression " + regexp + " has led to a stack overflow error. " + "This error is certainly due to an inefficient use of alternations. See http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=5050507", e); } } }
@Override public boolean consume(CodeReader code, Lexer lexer) { try { if (code.popTo(matcher, tmpBuilder) > 0) { String value = tmpBuilder.toString(); Token token = tokenBuilder .setType(type) .setValueAndOriginalValue(value) .setURI(lexer.getURI()) .setLine(code.getPreviousCursor().getLine()) .setColumn(code.getPreviousCursor().getColumn()) .build(); lexer.addToken(token); tmpBuilder.delete(0, tmpBuilder.length()); return true; } return false; } catch (StackOverflowError e) { throw new LexerException( "The regular expression " + regexp + " has led to a stack overflow error. " + "This error is certainly due to an inefficient use of alternations. See http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=5050507", e); } } }
@Override public boolean consume(CodeReader code, Lexer lexer) { try { if (code.popTo(matcher, tmpBuilder) > 0) { String value = tmpBuilder.toString(); Token token = tokenBuilder .setType(type) .setValueAndOriginalValue(value) .setURI(lexer.getURI()) .setLine(code.getPreviousCursor().getLine()) .setColumn(code.getPreviousCursor().getColumn()) .build(); lexer.addToken(token); tmpBuilder.delete(0, tmpBuilder.length()); return true; } return false; } catch (StackOverflowError e) { throw new LexerException( "The regular expression " + regexp + " has led to a stack overflow error. " + "This error is certainly due to an inefficient use of alternations. See http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=5050507", e); } } }
@Override public boolean consume(CodeReader code, Lexer lexer) { try { if (code.popTo(matcher, tmpBuilder) > 0) { String value = tmpBuilder.toString(); Token token = tokenBuilder .setType(type) .setValueAndOriginalValue(value) .setURI(lexer.getURI()) .setLine(code.getPreviousCursor().getLine()) .setColumn(code.getPreviousCursor().getColumn()) .build(); lexer.addToken(token); tmpBuilder.delete(0, tmpBuilder.length()); return true; } return false; } catch (StackOverflowError e) { throw new LexerException( "The regular expression " + regexp + " has led to a stack overflow error. " + "This error is certainly due to an inefficient use of alternations. See http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=5050507", e); } } }
@Override public boolean consume(CodeReader code, Lexer output) { int line = code.getLinePosition(); int column = code.getColumnPosition(); index = 0; readStringPrefix(code); if ((ch != '\'') && (ch != '\"')) { return false; } if (!read(code)) { return false; } for (int i = 0; i < index; i++) { sb.append((char) code.pop()); } output.addToken(Token.builder() .setLine(line) .setColumn(column) .setURI(output.getURI()) .setValueAndOriginalValue(sb.toString()) .setType(PythonTokenType.STRING) .build()); sb.setLength(0); return true; }
@Override public boolean consume(CodeReader code, Lexer output) { int line = code.getLinePosition(); int column = code.getColumnPosition(); index = 0; readStringPrefix(code); if ((ch != '\'') && (ch != '\"')) { return false; } if (!read(code)) { return false; } for (int i = 0; i < index; i++) { sb.append((char) code.pop()); } output.addToken(Token.builder() .setLine(line) .setColumn(column) .setURI(output.getURI()) .setValueAndOriginalValue(sb.toString()) .setType(PythonTokenType.STRING) .build()); sb.setLength(0); return true; }
public static List<Token> lex(String sourceCode) { List<Token> tokens = new ArrayList<>(); CodeReader reader = new CodeReader(sourceCode); Matcher matcher = Pattern.compile("[a-zA-Z_0-9\\+\\-\\*/]+").matcher(""); while (reader.peek() != -1) { StringBuilder nextStringToken = new StringBuilder(); Token token; int linePosition = reader.getLinePosition(); int columnPosition = reader.getColumnPosition(); if (reader.popTo(matcher, nextStringToken) != -1) { if ("EOF".equals(nextStringToken.toString())) { token = mockTokenBuilder(EOF, nextStringToken.toString()).setLine(linePosition).setColumn(columnPosition).build(); } else { token = mockTokenBuilder(IDENTIFIER, nextStringToken.toString()).setLine(linePosition).setColumn(columnPosition).build(); } } else if (Character.isWhitespace(reader.peek())) { reader.pop(); continue; } else { token = mockTokenBuilder(IDENTIFIER, Character.toString((char) reader.pop())).setLine(linePosition).setColumn(columnPosition).build(); } tokens.add(token); } return tokens; }