public TokenChunker build() { return new TokenChunker(this); }
public static Builder builder() { return new Builder(); }
public static TokenChunker build() { return TokenChunker.builder() .ignore("\\s") .ignore("//[^\\n\\r]*+") .ignore("/\\*[\\s\\S]*?\\*/") .token("\"([^\"\\\\]*+(\\\\[\\s\\S])?+)*+\"", NORMALIZED_CHARACTER_LITERAL) .token("'([^'\\n\\\\]*+(\\\\.)?+)*+'", NORMALIZED_CHARACTER_LITERAL) .token("\\p{javaJavaIdentifierStart}++\\p{javaJavaIdentifierPart}*+") .token("[0-9_]++\\.([0-9_]++)?+" + EXP + "?+" + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) .token("\\.[0-9_]++" + EXP + "?+" + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) .token("[0-9_]++" + EXP + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) .token("0[xX][0-9a-fA-F_]++\\.[0-9a-fA-F_]*+" + BINARY_EXP + "?+" + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) .token("0[xX][0-9a-fA-F_]++" + BINARY_EXP + FLOAT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) .token("0[xX][0-9a-fA-F_]++" + INT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) .token("0[bB][01_]++" + INT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL) .token("[0-9_]++" + INT_SUFFIX + "?+", NORMALIZED_NUMERIC_LITERAL)
@Test public void shouldConsume() { TokenChannel channel = new TokenChannel("ABC"); TokenQueue output = mock(TokenQueue.class); CodeReader codeReader = new CodeReader("ABCD"); assertThat(channel.consume(codeReader, output), is(true)); ArgumentCaptor<Token> token = ArgumentCaptor.forClass(Token.class); verify(output).add(token.capture()); assertThat(token.getValue(), is(new Token("ABC", 1, 0))); verifyNoMoreInteractions(output); assertThat(codeReader.getLinePosition(), is(1)); assertThat(codeReader.getColumnPosition(), is(3)); }
public TokenQueue chunk(String sourceCode) { return chunk(new StringReader(sourceCode)); }
@Override public boolean matchToken(TokenQueue tokenQueue, List<Token> matchedTokenList) { if (tokenQueue.isNextTokenValue(tokenToMatch)) { matchedTokenList.add(tokenQueue.poll()); return true; } return false; }
@Override public boolean consume(CodeReader code, TokenQueue output) { if (code.popTo(matcher, tmpBuilder) > 0) { // see SONAR-2499 Cursor previousCursor = code.getPreviousCursor(); if (normalizationValue != null) { output.add(new Token(normalizationValue, previousCursor.getLine(), previousCursor.getColumn())); } else { output.add(new Token(tmpBuilder.toString(), previousCursor.getLine(), previousCursor.getColumn())); } // Godin: note that other channels use method delete in order to do the same thing tmpBuilder.setLength(0); return true; } return false; }
@Before public void initTest() { List<Token> tokenList = new ArrayList<>(); tokenList.add(new Token("a", 1, 0)); tokenList.add(new Token("bc", 1, 2)); tokenList.add(new Token("def", 1, 5)); tokenQueue = new TokenQueue(tokenList); }
/** * Defines that sequence of characters, which is matched specified regular expression, is a token. */ public Builder token(String regularExpression) { channelDispatcherBuilder.addChannel(new TokenChannel(regularExpression)); return this; }
/** * @return always true */ @Override public boolean matchToken(TokenQueue tokenQueue, List<Token> matchedTokenList) { matchedTokenList.add(tokenQueue.poll()); return true; }
/** * @return always true */ @Override public boolean matchToken(TokenQueue tokenQueue, List<Token> matchedTokenList) { int last = matchedTokenList.size() - 1; tokenQueue.pushForward(Collections.singletonList(matchedTokenList.get(last))); matchedTokenList.remove(last); return true; }
/** * Defines that sequence of characters must be ignored, if it matches specified regular expression. */ public Builder ignore(String regularExpression) { channelDispatcherBuilder.addChannel(new BlackHoleTokenChannel(regularExpression)); return this; }
@Test public void shouldNormalize() { TokenChannel channel = new TokenChannel("ABC", "normalized"); TokenQueue output = mock(TokenQueue.class); CodeReader codeReader = new CodeReader("ABCD"); assertThat(channel.consume(codeReader, output), is(true)); ArgumentCaptor<Token> token = ArgumentCaptor.forClass(Token.class); verify(output).add(token.capture()); assertThat(token.getValue(), is(new Token("normalized", 1, 0))); verifyNoMoreInteractions(output); assertThat(codeReader.getLinePosition(), is(1)); assertThat(codeReader.getColumnPosition(), is(3)); }
private List<Statement> chunk(String sourceCode) { return statementChunker.chunk(tokenChunker.chunk(sourceCode)); }
/** * Defines that sequence of characters, which is matched specified regular expression, is a token with specified value. */ public Builder token(String regularExpression, String normalizationValue) { channelDispatcherBuilder.addChannel(new TokenChannel(regularExpression, normalizationValue)); return this; }
@Test public void shouldCorrectlyDeterminePositionWhenTokenSpansMultipleLines() { TokenChannel channel = new TokenChannel("AB\nC"); TokenQueue output = mock(TokenQueue.class); CodeReader codeReader = new CodeReader("AB\nCD"); assertThat(channel.consume(codeReader, output), is(true)); ArgumentCaptor<Token> token = ArgumentCaptor.forClass(Token.class); verify(output).add(token.capture()); assertThat(token.getValue(), is(new Token("AB\nC", 1, 0))); verifyNoMoreInteractions(output); assertThat(codeReader.getLinePosition(), is(2)); assertThat(codeReader.getColumnPosition(), is(1)); }