@Before public void setUp() { index = new PackedMemoryCloneIndex(); }
/** * Performs sorting, if necessary. */ private void ensureSorted() { if (sorted) { return; } ensureCapacity(); DataUtils.sort(byBlockHash); for (int i = 0; i < size; i++) { resourceIdsIndex[i] = i; } DataUtils.sort(byResourceId); sorted = true; }
/** * {@inheritDoc} */ @Override public Collection<Block> getBySequenceHash(ByteArray sequenceHash) { ensureSorted(); // prepare hash for binary search int[] hash = sequenceHash.toIntArray(); if (hash.length != hashInts) { throw new IllegalArgumentException("Expected " + hashInts + " ints in hash, but got " + hash.length); } int offset = size * blockInts; for (int i = 0; i < hashInts; i++) { blockData[offset++] = hash[i]; } int index = DataUtils.binarySearch(byBlockHash); List<Block> result = new ArrayList<>(); while (index < size && !isLessByHash(size, index)) { // extract block (note that there is no need to extract hash) String resourceId = resourceIds[index]; result.add(createBlock(index, resourceId, sequenceHash)); index++; } return result; }
/** * {@inheritDoc} * <p> * <strong>Note that this implementation does not guarantee that blocks would be sorted by index.</strong> * </p> */ @Override public Collection<Block> getByResourceId(String resourceId) { ensureSorted(); // prepare resourceId for binary search resourceIds[size] = resourceId; resourceIdsIndex[size] = size; int index = DataUtils.binarySearch(byResourceId); List<Block> result = new ArrayList<>(); int realIndex = resourceIdsIndex[index]; while (index < size && FastStringComparator.INSTANCE.compare(resourceIds[realIndex], resourceId) == 0) { result.add(getBlock(realIndex, resourceId)); index++; realIndex = resourceIdsIndex[index]; } return result; }
@Test public void test() { index.insert(newBlock("a", 1)); index.insert(newBlock("a", 2)); index.insert(newBlock("b", 1)); index.insert(newBlock("c", 1)); index.insert(newBlock("d", 1)); index.insert(newBlock("e", 1)); index.insert(newBlock("e", 2)); index.insert(newBlock("e", 3)); assertThat(index.noResources()).isEqualTo(5); assertThat(index.getBySequenceHash(new ByteArray(1L)).size(), is(5)); assertThat(index.getBySequenceHash(new ByteArray(2L)).size(), is(2)); assertThat(index.getBySequenceHash(new ByteArray(3L)).size(), is(1)); assertThat(index.getBySequenceHash(new ByteArray(4L)).size(), is(0)); assertThat(index.getByResourceId("a").size(), is(2)); assertThat(index.getByResourceId("b").size(), is(1)); assertThat(index.getByResourceId("e").size(), is(3)); assertThat(index.getByResourceId("does not exist").size(), is(0)); }
ensureSorted(); while (index < size && !isLessByHash(size, index)) {
@Override /** * Computation is O(N) */ public int noResources() { ensureSorted(); int count = 0; String lastResource = null; for (int i = 0; i < size; i++) { String resource = resourceIds[resourceIdsIndex[i]]; if (resource != null && !resource.equals(lastResource)) { count++; lastResource = resource; } } return count; } }
@Test public void iterate() { index.insert(newBlock("a", 1)); index.insert(newBlock("c", 1)); index.insert(newBlock("b", 1)); index.insert(newBlock("c", 2)); index.insert(newBlock("a", 2)); Iterator<ResourceBlocks> it = index.iterator(); ArrayList<ResourceBlocks> resourcesBlocks = new ArrayList<>(); while(it.hasNext()) { resourcesBlocks.add(it.next()); } assertThat(resourcesBlocks).hasSize(3); assertThat(resourcesBlocks.get(0).resourceId()).isEqualTo("a"); assertThat(resourcesBlocks.get(1).resourceId()).isEqualTo("b"); assertThat(resourcesBlocks.get(2).resourceId()).isEqualTo("c"); assertThat(resourcesBlocks.get(0).blocks()).hasSize(2); assertThat(resourcesBlocks.get(1).blocks()).hasSize(1); assertThat(resourcesBlocks.get(2).blocks()).hasSize(2); }
/** * When: query by a hash value. * Expected: all blocks should have same hash, which presented in the form of the same object. */ @Test public void should_construct_blocks_with_normalized_hash() { index.insert(newBlock("a", 1)); index.insert(newBlock("b", 1)); index.insert(newBlock("c", 1)); ByteArray requestedHash = new ByteArray(1L); Collection<Block> blocks = index.getBySequenceHash(requestedHash); assertThat(blocks.size(), is(3)); for (Block block : blocks) { assertThat(block.getBlockHash(), sameInstance(requestedHash)); } }
private Block getBlock(int index, String resourceId) { return createBlock(index, resourceId, null); }
/** * {@inheritDoc} */ @Override public Iterator<ResourceBlocks> iterator() { ensureSorted(); return new ResourceIterator(); }
public void computeCpd(Component component, Collection<Block> originBlocks, Collection<Block> duplicationBlocks) { CloneIndex duplicationIndex = new PackedMemoryCloneIndex(); populateIndex(duplicationIndex, originBlocks); populateIndex(duplicationIndex, duplicationBlocks); List<CloneGroup> duplications = SuffixTreeCloneDetectionAlgorithm.detect(duplicationIndex, originBlocks); Iterable<CloneGroup> filtered = from(duplications).filter(getNumberOfUnitsNotLessThan(component.getFileAttributes().getLanguageKey())); addDuplications(component, filtered); }
/** * {@inheritDoc} * <p> * <strong>Note that this implementation allows insertion of two blocks with same index for one resource.</strong> * </p> */ @Override public void insert(Block block) { sorted = false; ensureCapacity(); resourceIds[size] = block.getResourceId(); int[] hash = block.getBlockHash().toIntArray(); if (hash.length != hashInts) { throw new IllegalArgumentException("Expected " + hashInts + " ints in hash, but got " + hash.length); } int offset = size * blockInts; for (int i = 0; i < hashInts; i++) { blockData[offset++] = hash[i]; } blockData[offset++] = block.getIndexInFile(); blockData[offset++] = block.getStartLine(); blockData[offset++] = block.getEndLine(); blockData[offset++] = block.getStartUnit(); blockData[offset] = block.getEndUnit(); size++; }
/** * Given: index, which accepts blocks with 4-byte hash. * Expected: exception during insertion of block with 8-byte hash. */ @Test(expected = IllegalArgumentException.class) public void attempt_to_insert_hash_of_incorrect_size() { CloneIndex index = new PackedMemoryCloneIndex(4, 1); index.insert(newBlock("a", 1)); }
/** * Performs sorting, if necessary. */ private void ensureSorted() { if (sorted) { return; } ensureCapacity(); DataUtils.sort(byBlockHash); for (int i = 0; i < size; i++) { resourceIdsIndex[i] = i; } DataUtils.sort(byResourceId); sorted = true; }
/** * Given: index, which accepts blocks with 4-byte hash. * Expected: exception during search by 8-byte hash. */ @Test(expected = IllegalArgumentException.class) public void attempt_to_find_hash_of_incorrect_size() { CloneIndex index = new PackedMemoryCloneIndex(4, 1); index.getBySequenceHash(new ByteArray(1L)); }
/** * {@inheritDoc} * <p> * <strong>Note that this implementation allows insertion of two blocks with same index for one resource.</strong> * </p> */ @Override public void insert(Block block) { sorted = false; ensureCapacity(); resourceIds[size] = block.getResourceId(); int[] hash = block.getBlockHash().toIntArray(); if (hash.length != hashInts) { throw new IllegalArgumentException("Expected " + hashInts + " ints in hash, but got " + hash.length); } int offset = size * blockInts; for (int i = 0; i < hashInts; i++) { blockData[offset++] = hash[i]; } blockData[offset++] = block.getIndexInFile(); blockData[offset++] = block.getStartLine(); blockData[offset++] = block.getEndLine(); blockData[offset++] = block.getStartUnit(); blockData[offset] = block.getEndUnit(); size++; }
@Before public void setUp() { index = new PackedMemoryCloneIndex(); bridge = new TokenizerBridge(new JavaTokenizer(), 10); }
/** * Given: index with initial capacity 1. * Expected: size and capacity should be increased after insertion of two blocks. */ @Test public void should_increase_capacity() { CloneIndex index = new PackedMemoryCloneIndex(8, 1); index.insert(newBlock("a", 1)); index.insert(newBlock("a", 2)); assertThat(index.getByResourceId("a").size(), is(2)); }