@Before public void setUp() { Tokenizer tokenizer = new Tokenizer() { public void tokenize(SourceCode tokens, Tokens tokenEntries) throws IOException { tokenEntries.add(new TokenEntry("t1", "src", 1)); tokenEntries.add(new TokenEntry("t2", "src", 1)); tokenEntries.add(new TokenEntry("t3", "src", 2)); tokenEntries.add(new TokenEntry("t1", "src", 4)); tokenEntries.add(new TokenEntry("t3", "src", 4)); tokenEntries.add(new TokenEntry("t3", "src", 4)); tokenEntries.add(TokenEntry.getEOF()); } }; bridge = new TokenizerBridge(tokenizer, 10); }
@Override public void tokenize(SourceCode tokens, Tokens tokenEntries) { List<String> code = tokens.getCode(); for (int i = 0; i < code.size(); i++) { String currentLine = code.get(i); for (int j = 0; j < currentLine.length(); j++) { char tok = currentLine.charAt(j); if (!Character.isWhitespace(tok) && tok != '{' && tok != '}' && tok != ';') { tokenEntries.add(new TokenEntry(String.valueOf(tok), tokens.getFileName(), i + 1)); } } } tokenEntries.add(TokenEntry.getEOF()); } }
private void processToken(Tokens tokenEntries, String fileName, Token currentToken, ConstructorDetector constructorDetector) { String image = currentToken.image; constructorDetector.restoreConstructorToken(tokenEntries, currentToken); if (ignoreLiterals && (currentToken.kind == JavaParserConstants.STRING_LITERAL || currentToken.kind == JavaParserConstants.CHARACTER_LITERAL || currentToken.kind == JavaParserConstants.DECIMAL_LITERAL || currentToken.kind == JavaParserConstants.FLOATING_POINT_LITERAL)) { image = String.valueOf(currentToken.kind); } if (ignoreIdentifiers && currentToken.kind == JavaParserConstants.IDENTIFIER) { image = String.valueOf(currentToken.kind); } constructorDetector.processToken(currentToken); tokenEntries.add(new TokenEntry(image, fileName, currentToken.beginLine)); }
@Override public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { StringBuilder sb = sourceCode.getCodeBuffer(); try (BufferedReader reader = new BufferedReader(new CharArrayReader(sb.toString().toCharArray()))) { int lineNumber = 1; String line = reader.readLine(); while (line != null) { StringTokenizer tokenizer = new StringTokenizer(line, TOKENS, true); while (tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken(); if (!" ".equals(token) && !"\t".equals(token)) { tokenEntries.add(new TokenEntry(token, sourceCode.getFileName(), lineNumber)); } } // advance iteration variables line = reader.readLine(); lineNumber++; } } catch (IOException ignored) { ignored.printStackTrace(); } finally { tokenEntries.add(TokenEntry.getEOF()); } } }
@Override public void tokenize(SourceCode tokens, Tokens tokenEntries) { code = tokens.getCode(); for (lineNumber = 0; lineNumber < code.size(); lineNumber++) { currentLine = code.get(lineNumber); int loc = 0; while (loc < currentLine.length()) { StringBuilder token = new StringBuilder(); loc = getTokenFromLine(token, loc); if (token.length() > 0 && !isIgnorableString(token.toString())) { if (downcaseString) { token = new StringBuilder(token.toString().toLowerCase(Locale.ROOT)); } // need to re-think how to link this // if ( CPD.debugEnable ) { // System.out.println("Token added:" + token.toString()); // } tokenEntries.add(new TokenEntry(token.toString(), tokens.getFileName(), lineNumber + 1)); } } } tokenEntries.add(TokenEntry.getEOF()); }
public void tokenize(SourceCode tokens, Tokens tokenEntries) throws IOException { tokenEntries.add(new TokenEntry("t1", "src", 1)); tokenEntries.add(new TokenEntry("t2", "src", 1)); tokenEntries.add(new TokenEntry("t3", "src", 2)); tokenEntries.add(new TokenEntry("t1", "src", 4)); tokenEntries.add(new TokenEntry("t3", "src", 4)); tokenEntries.add(new TokenEntry("t3", "src", 4)); tokenEntries.add(TokenEntry.getEOF()); } };
tokenEntries.add(new TokenEntry(token.image, sourceCode.getFileName(), token.lineNumber));
@Test public void shouldClearCacheInTokenEntry() { bridge.chunk("file.txt", new InputStreamReader(new ByteArrayInputStream(new byte[0]), StandardCharsets.UTF_8)); TokenEntry token = new TokenEntry("image", "srcId", 0); assertThat(token.getIndex(), is(0)); assertThat(token.getIdentifier(), is(1)); }
@Test public void testNewTokenEntry() { TokenEntry entry = new TokenEntry("token1", "src1", 1); assertThat(entry.getValue(), equalTo("token1")); assertThat(entry.getBeginLine(), equalTo(1)); entry = new TokenEntry("token2", "src2", 2); assertThat(entry.getValue(), equalTo("token2")); assertThat(entry.getBeginLine(), equalTo(2)); }
@Override public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { StringBuilder buffer = sourceCode.getCodeBuffer(); LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(JspLanguageModule.NAME) .getDefaultVersion().getLanguageVersionHandler(); try (Reader reader = IOUtil.skipBOM(new StringReader(buffer.toString()))) { TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions()) .getTokenManager(sourceCode.getFileName(), reader); Token currentToken = (Token) tokenMgr.getNextToken(); while (currentToken.image.length() > 0) { tokenEntries.add(new TokenEntry(String.valueOf(currentToken.kind), sourceCode.getFileName(), currentToken.beginLine)); currentToken = (Token) tokenMgr.getNextToken(); } } catch (IOException e) { e.printStackTrace(); } tokenEntries.add(TokenEntry.getEOF()); } }
@Override public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { StringBuilder buffer = sourceCode.getCodeBuffer(); try (Reader reader = new StringReader(buffer.toString())) { final TokenFilter tokenFilter = new JavaCCTokenFilter(new ObjectiveCTokenManager(reader)); Token currentToken = (Token) tokenFilter.getNextToken(); while (currentToken != null) { tokenEntries.add(new TokenEntry(currentToken.image, sourceCode.getFileName(), currentToken.beginLine)); currentToken = (Token) tokenFilter.getNextToken(); } tokenEntries.add(TokenEntry.getEOF()); System.err.println("Added " + sourceCode.getFileName()); } catch (TokenMgrError err) { err.printStackTrace(); System.err.println("Skipping " + sourceCode.getFileName() + " due to parse error"); tokenEntries.add(TokenEntry.getEOF()); } catch (IOException e) { e.printStackTrace(); } } }
@Override public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { StringBuilder buffer = sourceCode.getCodeBuffer(); LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(VfLanguageModule.NAME) .getDefaultVersion().getLanguageVersionHandler(); try (Reader reader = IOUtil.skipBOM(new StringReader(buffer.toString()))) { TokenManager tokenMgr = languageVersionHandler.getParser(languageVersionHandler.getDefaultParserOptions()) .getTokenManager(sourceCode.getFileName(), reader); Token currentToken = (Token) tokenMgr.getNextToken(); while (currentToken.image.length() > 0) { tokenEntries.add(new TokenEntry(String.valueOf(currentToken.kind), sourceCode.getFileName(), currentToken.beginLine)); currentToken = (Token) tokenMgr.getNextToken(); } } catch (IOException e) { e.printStackTrace(); } tokenEntries.add(TokenEntry.getEOF()); } }
@Override public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { StringBuilder buffer = sourceCode.getCodeBuffer(); try (Reader reader = IOUtil.skipBOM(new StringReader(buffer.toString()))) { final TokenFilter tokenFilter = new JavaCCTokenFilter(new MatlabTokenManager(reader)); Token currentToken = (Token) tokenFilter.getNextToken(); while (currentToken != null) { tokenEntries.add(new TokenEntry(currentToken.image, sourceCode.getFileName(), currentToken.beginLine)); currentToken = (Token) tokenFilter.getNextToken(); } tokenEntries.add(TokenEntry.getEOF()); System.err.println("Added " + sourceCode.getFileName()); } catch (TokenMgrError | IOException err) { err.printStackTrace(); System.err.println("Skipping " + sourceCode.getFileName() + " due to parse error"); tokenEntries.add(TokenEntry.getEOF()); } } }
@Override public void tokenize(SourceCode source, Tokens cpdTokens) { String filename = source.getFileName(); try { Lexer lexer = new Lexer(); List<Token> tokens = lexer.getTokensOfFile(filename); for (Token token : tokens) { String tokenVal = token.tokenVal() != null ? token.tokenVal() : Integer.toString(token.tokenType()); TokenEntry cpdToken = new TokenEntry(tokenVal, filename, token.line()); cpdTokens.add(cpdToken); } cpdTokens.add(TokenEntry.getEOF()); } catch (RuntimeException e) { e.printStackTrace(); // Wrap exceptions of the Scala tokenizer in a TokenMgrError, so // they are correctly handled // when CPD is executed with the '--skipLexicalErrors' command line // option throw new TokenMgrError( "Lexical error in file " + filename + ". The scala tokenizer exited with error: " + e.getMessage(), TokenMgrError.LEXICAL_ERROR); } }
@Override public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { StringBuilder buffer = sourceCode.getCodeBuffer(); try (Reader reader = IOUtil.skipBOM(new StringReader(maybeSkipBlocks(buffer.toString())))) { final TokenFilter tokenFilter = new JavaCCTokenFilter(new CppTokenManager(reader)); GenericToken currentToken = tokenFilter.getNextToken(); while (currentToken != null) { tokenEntries.add(new TokenEntry(currentToken.getImage(), sourceCode.getFileName(), currentToken.getBeginLine())); currentToken = tokenFilter.getNextToken(); } tokenEntries.add(TokenEntry.getEOF()); System.err.println("Added " + sourceCode.getFileName()); } catch (TokenMgrError | IOException err) { err.printStackTrace(); System.err.println("Skipping " + sourceCode.getFileName() + " due to parse error"); tokenEntries.add(TokenEntry.getEOF()); } }
@Override public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { StringBuilder buffer = sourceCode.getCodeBuffer(); try (Reader reader = IOUtil.skipBOM(new StringReader(buffer.toString()))) { LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(PythonLanguageModule.NAME) .getDefaultVersion().getLanguageVersionHandler(); TokenFilter tokenFilter = new JavaCCTokenFilter(languageVersionHandler .getParser(languageVersionHandler.getDefaultParserOptions()) .getTokenManager(sourceCode.getFileName(), reader)); Token currentToken = (Token) tokenFilter.getNextToken(); while (currentToken != null) { tokenEntries.add(new TokenEntry(currentToken.image, sourceCode.getFileName(), currentToken.beginLine)); currentToken = (Token) tokenFilter.getNextToken(); } tokenEntries.add(TokenEntry.getEOF()); System.err.println("Added " + sourceCode); } catch (TokenMgrError | IOException err) { err.printStackTrace(); System.err.println("Skipping " + sourceCode + " due to parse error"); tokenEntries.add(TokenEntry.getEOF()); } } }
@Override public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { StringBuilder buffer = sourceCode.getCodeBuffer(); try (Reader reader = new StringReader(buffer.toString())) { LanguageVersionHandler languageVersionHandler = LanguageRegistry.getLanguage(EcmascriptLanguageModule.NAME) .getDefaultVersion().getLanguageVersionHandler(); TokenFilter tokenFilter = new JavaCCTokenFilter(languageVersionHandler .getParser(languageVersionHandler.getDefaultParserOptions()) .getTokenManager(sourceCode.getFileName(), reader)); Token currentToken = (Token) tokenFilter.getNextToken(); while (currentToken != null) { tokenEntries.add( new TokenEntry(getTokenImage(currentToken), sourceCode.getFileName(), currentToken.beginLine)); currentToken = (Token) tokenFilter.getNextToken(); } tokenEntries.add(TokenEntry.getEOF()); System.err.println("Added " + sourceCode.getFileName()); } catch (TokenMgrError err) { err.printStackTrace(); System.err.println("Skipping " + sourceCode.getFileName() + " due to parse error"); tokenEntries.add(TokenEntry.getEOF()); } catch (IOException e) { e.printStackTrace(); } }
tokenText = tokenText.toLowerCase(Locale.ROOT); TokenEntry tokenEntry = new TokenEntry(tokenText, sourceCode.getFileName(), token.getLine()); tokenEntries.add(tokenEntry);
@Override public void tokenize(SourceCode sourceCode, Tokens tokenEntries) { StringBuilder buffer = sourceCode.getCodeBuffer(); GroovyLexer lexer = new GroovyLexer(new StringReader(buffer.toString())); TokenStream tokenStream = lexer.plumb(); try { Token token = tokenStream.nextToken(); while (token.getType() != Token.EOF_TYPE) { TokenEntry tokenEntry = new TokenEntry(token.getText(), sourceCode.getFileName(), token.getLine()); tokenEntries.add(tokenEntry); token = tokenStream.nextToken(); } } catch (TokenStreamException err) { // Wrap exceptions of the Groovy tokenizer in a TokenMgrError, so // they are correctly handled // when CPD is executed with the '--skipLexicalErrors' command line // option throw new TokenMgrError("Lexical error in file " + sourceCode.getFileName() + " at line " + lexer.getLine() + ", column " + lexer.getColumn() + ". Encountered: " + err.getMessage(), TokenMgrError.LEXICAL_ERROR); } finally { tokenEntries.add(TokenEntry.getEOF()); } } }
@Override public void tokenize(final SourceCode sourceCode, final Tokens tokenEntries) { AntlrTokenManager tokenManager = getLexerForSource(sourceCode); try { AntlrToken token = (AntlrToken) tokenManager.getNextToken(); while (token.getType() != Token.EOF) { if (!token.isHidden()) { final TokenEntry tokenEntry = new TokenEntry(token.getImage(), tokenManager.getFileName(), token.getBeginLine()); tokenEntries.add(tokenEntry); } token = (AntlrToken) tokenManager.getNextToken(); } } catch (final AntlrTokenManager.ANTLRSyntaxError err) { // Wrap exceptions of the ANTLR tokenizer in a TokenMgrError, so they are correctly handled // when CPD is executed with the '--skipLexicalErrors' command line option throw new TokenMgrError("Lexical error in file " + tokenManager.getFileName() + " at line " + err.getLine() + ", column " + err.getColumn() + ". Encountered: " + err.getMessage(), TokenMgrError.LEXICAL_ERROR); } finally { tokenEntries.add(TokenEntry.getEOF()); } }