public void findMatches() { cpdListener.phaseUpdate(CPDListener.HASH); Map<TokenEntry, Object> markGroups = hash(); cpdListener.phaseUpdate(CPDListener.MATCH); MatchCollector matchCollector = new MatchCollector(this); for (Iterator<Object> i = markGroups.values().iterator(); i.hasNext();) { Object o = i.next(); if (o instanceof List) { @SuppressWarnings("unchecked") List<TokenEntry> l = (List<TokenEntry>) o; Collections.reverse(l); matchCollector.collect(l); } i.remove(); } cpdListener.phaseUpdate(CPDListener.GROUPING); matches = matchCollector.getMatches(); for (Match match : matches) { for (Mark mark : match) { TokenEntry token = mark.getToken(); int lineCount = tokens.getLineCount(token, match); mark.setLineCount(lineCount); SourceCode sourceCode = source.get(token.getTokenSrcID()); mark.setSourceCode(sourceCode); } } cpdListener.phaseUpdate(CPDListener.DONE); }
private void reportMatch(TokenEntry mark1, TokenEntry mark2, int dupes) { Map<Integer, Match> matches = matchTree.get(dupes); if (matches == null) { matches = new TreeMap<>(); matchTree.put(dupes, matches); addNewMatch(mark1, mark2, dupes, matches); } else { Match matchA = matchTree.get(dupes).get(mark1.getIndex()); Match matchB = matchTree.get(dupes).get(mark2.getIndex()); if (matchA == null && matchB == null) { addNewMatch(mark1, mark2, dupes, matches); } else if (matchA == null) { matchB.addTokenEntry(mark1); matches.put(mark1.getIndex(), matchB); } else if (matchB == null) { matchA.addTokenEntry(mark2); matches.put(mark2.getIndex(), matchA); } } }
public void collect(List<TokenEntry> marks) { // first get a pairwise collection of all maximal matches for (int i = 0; i < marks.size() - 1; i++) { TokenEntry mark1 = marks.get(i); for (int j = i + 1; j < marks.size(); j++) { TokenEntry mark2 = marks.get(j); int diff = mark1.getIndex() - mark2.getIndex(); if (-diff < ma.getMinimumTileSize()) { continue; } if (hasPreviousDupe(mark1, mark2)) { continue; } // "match too small" check int dupes = countDuplicateTokens(mark1, mark2); if (dupes < ma.getMinimumTileSize()) { continue; } // is it still too close together if (diff + dupes >= 1) { continue; } reportMatch(mark1, mark2, dupes); } } }
private int countDuplicateTokens(TokenEntry mark1, TokenEntry mark2) { int index = 0; while (!matchEnded(ma.tokenAt(index, mark1), ma.tokenAt(index, mark2))) { index++; } return index; }
private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) { if (mark1.getIndex() == 0) { return false; } return !matchEnded(ma.tokenAt(-1, mark1), ma.tokenAt(-1, mark2)); }
public void collect(List<TokenEntry> marks) { // first get a pairwise collection of all maximal matches for (int i = 0; i < marks.size() - 1; i++) { TokenEntry mark1 = marks.get(i); for (int j = i + 1; j < marks.size(); j++) { TokenEntry mark2 = marks.get(j); int diff = mark1.getIndex() - mark2.getIndex(); if (-diff < ma.getMinimumTileSize()) { continue; } if (hasPreviousDupe(mark1, mark2)) { continue; } // "match too small" check int dupes = countDuplicateTokens(mark1, mark2); if (dupes < ma.getMinimumTileSize()) { continue; } // is it still too close together if (diff + dupes >= 1) { continue; } reportMatch(mark1, mark2, dupes); } } }
public void findMatches() { cpdListener.phaseUpdate(CPDListener.HASH); Map<TokenEntry, Object> markGroups = hash(); cpdListener.phaseUpdate(CPDListener.MATCH); MatchCollector matchCollector = new MatchCollector(this); for (Iterator<Object> i = markGroups.values().iterator(); i.hasNext();) { Object o = i.next(); if (o instanceof List) { @SuppressWarnings("unchecked") List<TokenEntry> l = (List<TokenEntry>) o; Collections.reverse(l); matchCollector.collect(l); } i.remove(); } cpdListener.phaseUpdate(CPDListener.GROUPING); matches = matchCollector.getMatches(); for (Match match : matches) { for (Mark mark : match) { TokenEntry token = mark.getToken(); int lineCount = tokens.getLineCount(token, match); mark.setLineCount(lineCount); SourceCode sourceCode = source.get(token.getTokenSrcID()); mark.setSourceCode(sourceCode); } } cpdListener.phaseUpdate(CPDListener.DONE); }
private int countDuplicateTokens(TokenEntry mark1, TokenEntry mark2) { int index = 0; while (!matchEnded(ma.tokenAt(index, mark1), ma.tokenAt(index, mark2))) { index++; } return index; }
private void reportMatch(TokenEntry mark1, TokenEntry mark2, int dupes) { Map<Integer, Match> matches = matchTree.get(dupes); if (matches == null) { matches = new TreeMap<>(); matchTree.put(dupes, matches); addNewMatch(mark1, mark2, dupes, matches); } else { Match matchA = matchTree.get(dupes).get(mark1.getIndex()); Match matchB = matchTree.get(dupes).get(mark2.getIndex()); if (matchA == null && matchB == null) { addNewMatch(mark1, mark2, dupes, matches); } else if (matchA == null) { matchB.addTokenEntry(mark1); matches.put(mark1.getIndex(), matchB); } else if (matchB == null) { matchA.addTokenEntry(mark2); matches.put(mark2.getIndex(), matchA); } } }
private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) { if (mark1.getIndex() == 0) { return false; } return !matchEnded(ma.tokenAt(-1, mark1), ma.tokenAt(-1, mark2)); }