public TokenEntry tokenAt(int offset, TokenEntry m) { return code.get(offset + m.getIndex()); }
private void reportMatch(TokenEntry mark1, TokenEntry mark2, int dupes) { Map<Integer, Match> matches = matchTree.get(dupes); if (matches == null) { matches = new TreeMap<>(); matchTree.put(dupes, matches); addNewMatch(mark1, mark2, dupes, matches); } else { Match matchA = matchTree.get(dupes).get(mark1.getIndex()); Match matchB = matchTree.get(dupes).get(mark2.getIndex()); if (matchA == null && matchB == null) { addNewMatch(mark1, mark2, dupes, matches); } else if (matchA == null) { matchB.addTokenEntry(mark1); matches.put(mark1.getIndex(), matchB); } else if (matchB == null) { matchA.addTokenEntry(mark2); matches.put(mark2.getIndex(), matchA); } } }
public void collect(List<TokenEntry> marks) { // first get a pairwise collection of all maximal matches for (int i = 0; i < marks.size() - 1; i++) { TokenEntry mark1 = marks.get(i); for (int j = i + 1; j < marks.size(); j++) { TokenEntry mark2 = marks.get(j); int diff = mark1.getIndex() - mark2.getIndex(); if (-diff < ma.getMinimumTileSize()) { continue; } if (hasPreviousDupe(mark1, mark2)) { continue; } // "match too small" check int dupes = countDuplicateTokens(mark1, mark2); if (dupes < ma.getMinimumTileSize()) { continue; } // is it still too close together if (diff + dupes >= 1) { continue; } reportMatch(mark1, mark2, dupes); } } }
private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) { if (mark1.getIndex() == 0) { return false; } return !matchEnded(ma.tokenAt(-1, mark1), ma.tokenAt(-1, mark2)); }
public int getEndIndex() { return getMark(0).getToken().getIndex() + getTokenCount() - 1; }
@Test public void shouldClearCacheInTokenEntry() { bridge.chunk("file.txt", new InputStreamReader(new ByteArrayInputStream(new byte[0]), StandardCharsets.UTF_8)); TokenEntry token = new TokenEntry("image", "srcId", 0); assertThat(token.getIndex(), is(0)); assertThat(token.getIdentifier(), is(1)); }
public TokenEntry tokenAt(int offset, TokenEntry m) { return code.get(offset + m.getIndex()); }
private void reportMatch(TokenEntry mark1, TokenEntry mark2, int dupes) { Map<Integer, Match> matches = matchTree.get(dupes); if (matches == null) { matches = new TreeMap<>(); matchTree.put(dupes, matches); addNewMatch(mark1, mark2, dupes, matches); } else { Match matchA = matchTree.get(dupes).get(mark1.getIndex()); Match matchB = matchTree.get(dupes).get(mark2.getIndex()); if (matchA == null && matchB == null) { addNewMatch(mark1, mark2, dupes, matches); } else if (matchA == null) { matchB.addTokenEntry(mark1); matches.put(mark1.getIndex(), matchB); } else if (matchB == null) { matchA.addTokenEntry(mark2); matches.put(mark2.getIndex(), matchA); } } }
public void collect(List<TokenEntry> marks) { // first get a pairwise collection of all maximal matches for (int i = 0; i < marks.size() - 1; i++) { TokenEntry mark1 = marks.get(i); for (int j = i + 1; j < marks.size(); j++) { TokenEntry mark2 = marks.get(j); int diff = mark1.getIndex() - mark2.getIndex(); if (-diff < ma.getMinimumTileSize()) { continue; } if (hasPreviousDupe(mark1, mark2)) { continue; } // "match too small" check int dupes = countDuplicateTokens(mark1, mark2); if (dupes < ma.getMinimumTileSize()) { continue; } // is it still too close together if (diff + dupes >= 1) { continue; } reportMatch(mark1, mark2, dupes); } } }
private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) { if (mark1.getIndex() == 0) { return false; } return !matchEnded(ma.tokenAt(-1, mark1), ma.tokenAt(-1, mark2)); }
public int getEndIndex() { return getMark(0).getToken().getIndex() + getTokenCount() - 1; }