assertThat(duplicationBlocks.get(0).getStartLine()).isEqualTo(1); assertThat(duplicationBlocks.get(0).getEndLine()).isEqualTo(5); assertThat(duplicationBlocks.get(0).getStartTokenIndex()).isEqualTo(1); assertThat(duplicationBlocks.get(0).getEndTokenIndex()).isEqualTo(6); assertThat(duplicationBlocks.get(0).getHash()).isNotEmpty(); assertThat(duplicationBlocks.get(1).getStartLine()).isEqualTo(2); assertThat(duplicationBlocks.get(1).getEndLine()).isEqualTo(6); assertThat(duplicationBlocks.get(1).getStartTokenIndex()).isEqualTo(3); assertThat(duplicationBlocks.get(1).getEndTokenIndex()).isEqualTo(7); assertThat(duplicationBlocks.get(0).getHash()).isNotEmpty(); assertThat(duplicationBlocks.get(2).getStartLine()).isEqualTo(3); assertThat(duplicationBlocks.get(2).getEndLine()).isEqualTo(7); assertThat(duplicationBlocks.get(2).getStartTokenIndex()).isEqualTo(4); assertThat(duplicationBlocks.get(2).getEndTokenIndex()).isEqualTo(8); assertThat(duplicationBlocks.get(0).getHash()).isNotEmpty();
public void insert(InputFile inputFile, Collection<Block> blocks) { if (settings.isCrossProjectDuplicationEnabled()) { int id = ((DefaultInputFile) inputFile).scannerId(); if (publisher.getWriter().hasComponentData(FileStructure.Domain.CPD_TEXT_BLOCKS, id)) { throw new UnsupportedOperationException("Trying to save CPD tokens twice for the same file is not supported: " + inputFile.absolutePath()); } final ScannerReport.CpdTextBlock.Builder builder = ScannerReport.CpdTextBlock.newBuilder(); publisher.getWriter().writeCpdTextBlocks(id, blocks.stream().map(block -> { builder.clear(); builder.setStartLine(block.getStartLine()); builder.setEndLine(block.getEndLine()); builder.setStartTokenIndex(block.getStartUnit()); builder.setEndTokenIndex(block.getEndUnit()); builder.setHash(block.getBlockHash().toHexString()); return builder.build(); }).collect(Collectors.toList())); } for (Block block : blocks) { mem.insert(block); } if (blocks.isEmpty()) { LOG.debug("Not enough content in '{}' to have CPD blocks, it will not be part of the duplication detection", inputFile.relativePath()); } indexedFiles.add(inputFile); }
@Test public void write_duplication_blocks() { assertThat(underTest.hasComponentData(FileStructure.Domain.CPD_TEXT_BLOCKS, 1)).isFalse(); ScannerReport.CpdTextBlock duplicationBlock = ScannerReport.CpdTextBlock.newBuilder() .setHash("abcdefghijklmnop") .setStartLine(1) .setEndLine(2) .setStartTokenIndex(10) .setEndTokenIndex(15) .build(); underTest.writeCpdTextBlocks(1, asList(duplicationBlock)); assertThat(underTest.hasComponentData(FileStructure.Domain.CPD_TEXT_BLOCKS, 1)).isTrue(); File file = underTest.getFileStructure().fileFor(FileStructure.Domain.CPD_TEXT_BLOCKS, 1); assertThat(file).exists().isFile(); try (CloseableIterator<ScannerReport.CpdTextBlock> duplicationBlocks = Protobuf.readStream(file, ScannerReport.CpdTextBlock.parser())) { ScannerReport.CpdTextBlock duplicationBlockResult = duplicationBlocks.next(); assertThat(duplicationBlockResult.getHash()).isEqualTo("abcdefghijklmnop"); assertThat(duplicationBlockResult.getStartLine()).isEqualTo(1); assertThat(duplicationBlockResult.getEndLine()).isEqualTo(2); assertThat(duplicationBlockResult.getStartTokenIndex()).isEqualTo(10); assertThat(duplicationBlockResult.getEndTokenIndex()).isEqualTo(15); } }
public void insert(InputFile inputFile, Collection<Block> blocks) { if (settings.isCrossProjectDuplicationEnabled()) { int id = ((DefaultInputFile) inputFile).scannerId(); if (publisher.getWriter().hasComponentData(FileStructure.Domain.CPD_TEXT_BLOCKS, id)) { throw new UnsupportedOperationException("Trying to save CPD tokens twice for the same file is not supported: " + inputFile.absolutePath()); } final ScannerReport.CpdTextBlock.Builder builder = ScannerReport.CpdTextBlock.newBuilder(); publisher.getWriter().writeCpdTextBlocks(id, blocks.stream().map(block -> { builder.clear(); builder.setStartLine(block.getStartLine()); builder.setEndLine(block.getEndLine()); builder.setStartTokenIndex(block.getStartUnit()); builder.setEndTokenIndex(block.getEndUnit()); builder.setHash(block.getBlockHash().toHexString()); return builder.build(); }).collect(Collectors.toList())); } for (Block block : blocks) { mem.insert(block); } if (blocks.isEmpty()) { LOG.debug("Not enough content in '{}' to have CPD blocks, it will not be part of the duplication detection", inputFile.relativePath()); } indexedFiles.add(inputFile); }
private void readFromReport(Component component) { int indexInFile = 0; try (CloseableIterator<ScannerReport.CpdTextBlock> blocks = reportReader.readCpdTextBlocks(component.getReportAttributes().getRef())) { while (blocks.hasNext()) { ScannerReport.CpdTextBlock block = blocks.next(); dbClient.duplicationDao().insert( session, new DuplicationUnitDto() .setHash(block.getHash()) .setStartLine(block.getStartLine()) .setEndLine(block.getEndLine()) .setIndexInFile(indexInFile) .setAnalysisUuid(analysisUuid) .setComponentUuid(component.getUuid())); indexInFile++; } } }
@Override public Block apply(@Nonnull CpdTextBlock duplicationBlock) { Block block = Block.builder() .setResourceId(fileKey) .setBlockHash(new ByteArray(duplicationBlock.getHash())) .setIndexInFile(indexInFile) .setLines(duplicationBlock.getStartLine(), duplicationBlock.getEndLine()) .setUnit(duplicationBlock.getStartTokenIndex(), duplicationBlock.getEndTokenIndex()) .build(); indexInFile++; return block; } }
@Override public String apply(@Nonnull CpdTextBlock duplicationBlock) { return duplicationBlock.getHash(); } }
public CloseableIterator<ScannerReport.CpdTextBlock> readCpdTextBlocks(int componentRef) { File file = fileStructure.fileFor(FileStructure.Domain.CPD_TEXT_BLOCKS, componentRef); if (fileExists(file)) { return Protobuf.readStream(file, ScannerReport.CpdTextBlock.parser()); } return emptyCloseableIterator(); }
ScannerReport.CpdTextBlock originBlock1 = ScannerReport.CpdTextBlock.newBuilder() .setHash("a8998353e96320ec") .setStartLine(30) .setEndTokenIndex(10) .build(); ScannerReport.CpdTextBlock originBlock2 = ScannerReport.CpdTextBlock.newBuilder() .setHash("b1234353e96320ff") .setStartLine(10) .setHash(originBlock1.getHash()) .setStartLine(40) .setEndLine(55) .setHash(originBlock2.getHash()) .setStartLine(20) .setEndLine(35) new Block.Builder() .setResourceId(CURRENT_FILE_KEY) .setBlockHash(new ByteArray(originBlock1.getHash())) .setIndexInFile(0) .setLines(originBlock1.getStartLine(), originBlock1.getEndLine()) .setUnit(originBlock1.getStartTokenIndex(), originBlock1.getEndTokenIndex()) .build()); assertThat(originBlocksByIndex.get(1)).isEqualTo( new Block.Builder() .setResourceId(CURRENT_FILE_KEY) .setBlockHash(new ByteArray(originBlock2.getHash()))
dbSession.commit(); ScannerReport.CpdTextBlock originBlock = ScannerReport.CpdTextBlock.newBuilder() .setHash(hash) .setStartLine(30) .setBlockHash(new ByteArray(hash)) .setIndexInFile(0) .setLines(originBlock.getStartLine(), originBlock.getEndLine()) .setUnit(originBlock.getStartTokenIndex(), originBlock.getEndTokenIndex()) .build()), asList(
@Test public void nothing_to_do_when_cross_project_duplication_is_disabled() { when(crossProjectDuplicationStatusHolder.isEnabled()).thenReturn(false); analysisMetadataHolder.setBaseAnalysis(baseProjectAnalysis); ComponentDto otherProject = createProject("OTHER_PROJECT_KEY"); SnapshotDto otherProjectSnapshot = createProjectSnapshot(otherProject); ComponentDto otherFIle = createFile("OTHER_FILE_KEY", otherProject); String hash = "a8998353e96320ec"; DuplicationUnitDto duplicate = new DuplicationUnitDto() .setHash(hash) .setStartLine(40) .setEndLine(55) .setIndexInFile(0) .setAnalysisUuid(otherProjectSnapshot.getUuid()) .setComponentUuid(otherFIle.uuid()); dbClient.duplicationDao().insert(dbSession, duplicate); dbSession.commit(); ScannerReport.CpdTextBlock originBlock = ScannerReport.CpdTextBlock.newBuilder() .setHash(hash) .setStartLine(30) .setEndLine(45) .setStartTokenIndex(0) .setEndTokenIndex(10) .build(); batchReportReader.putDuplicationBlocks(FILE_REF, asList(originBlock)); underTest.execute(new TestComputationStepContext()); verifyZeroInteractions(integrateCrossProjectDuplications); }
@Test public void persist_many_cpd_text_blocks() { when(crossProjectDuplicationStatusHolder.isEnabled()).thenReturn(true); reportReader.putDuplicationBlocks(FILE_1_REF, Arrays.asList( CPD_TEXT_BLOCK, ScannerReport.CpdTextBlock.newBuilder() .setHash("b1234353e96320ff") .setStartLine(20) .setEndLine(15) .build())); TestComputationStepContext context = new TestComputationStepContext(); underTest.execute(context); List<Map<String, Object>> dtos = dbTester.select("select HASH, START_LINE, END_LINE, INDEX_IN_FILE, COMPONENT_UUID, ANALYSIS_UUID from duplications_index"); assertThat(dtos).extracting("HASH").containsOnly(CPD_TEXT_BLOCK.getHash(), "b1234353e96320ff"); assertThat(dtos).extracting("START_LINE").containsOnly(30L, 20L); assertThat(dtos).extracting("END_LINE").containsOnly(45L, 15L); assertThat(dtos).extracting("INDEX_IN_FILE").containsOnly(0L, 1L); assertThat(dtos).extracting("COMPONENT_UUID").containsOnly(FILE_1.getUuid()); assertThat(dtos).extracting("ANALYSIS_UUID").containsOnly(ANALYSIS_UUID); context.getStatistics().assertValue("inserts", 2); }
@Test public void read_duplication_blocks() { ScannerReportWriter writer = new ScannerReportWriter(dir); writer.writeMetadata(ScannerReport.Metadata.newBuilder() .setRootComponentRef(1).build()); writer.writeComponent(ScannerReport.Component.newBuilder() .setRef(1).build()); ScannerReport.CpdTextBlock duplicationBlock = ScannerReport.CpdTextBlock.newBuilder() .setHash("abcdefghijklmnop") .setStartLine(1) .setEndLine(2) .setStartTokenIndex(10) .setEndTokenIndex(15) .build(); writer.writeCpdTextBlocks(1, singletonList(duplicationBlock)); ScannerReportReader sut = new ScannerReportReader(dir); assertThat(sut.readCpdTextBlocks(1)).hasSize(1); }
private void readFromReport(Component component) { int indexInFile = 0; try (CloseableIterator<ScannerReport.CpdTextBlock> blocks = reportReader.readCpdTextBlocks(component.getReportAttributes().getRef())) { while (blocks.hasNext()) { ScannerReport.CpdTextBlock block = blocks.next(); dbClient.duplicationDao().insert( session, new DuplicationUnitDto() .setHash(block.getHash()) .setStartLine(block.getStartLine()) .setEndLine(block.getEndLine()) .setIndexInFile(indexInFile) .setAnalysisUuid(analysisUuid) .setComponentUuid(component.getUuid())); indexInFile++; } } count += indexInFile; }
@Test public void persist_cpd_text_block() { when(crossProjectDuplicationStatusHolder.isEnabled()).thenReturn(true); reportReader.putDuplicationBlocks(FILE_1_REF, singletonList(CPD_TEXT_BLOCK)); TestComputationStepContext context = new TestComputationStepContext(); underTest.execute(context); Map<String, Object> dto = dbTester.selectFirst("select HASH, START_LINE, END_LINE, INDEX_IN_FILE, COMPONENT_UUID, ANALYSIS_UUID from duplications_index"); assertThat(dto.get("HASH")).isEqualTo(CPD_TEXT_BLOCK.getHash()); assertThat(dto.get("START_LINE")).isEqualTo(30L); assertThat(dto.get("END_LINE")).isEqualTo(45L); assertThat(dto.get("INDEX_IN_FILE")).isEqualTo(0L); assertThat(dto.get("COMPONENT_UUID")).isEqualTo(FILE_1.getUuid()); assertThat(dto.get("ANALYSIS_UUID")).isEqualTo(ANALYSIS_UUID); context.getStatistics().assertValue("inserts", 1); }
@Test public void nothing_to_do_when_cpd_text_blocks_exists_but_no_duplicated_found() { when(crossProjectDuplicationStatusHolder.isEnabled()).thenReturn(true); analysisMetadataHolder.setBaseAnalysis(baseProjectAnalysis); ScannerReport.CpdTextBlock originBlock = ScannerReport.CpdTextBlock.newBuilder() .setHash("a8998353e96320ec") .setStartLine(30) .setEndLine(45) .setStartTokenIndex(0) .setEndTokenIndex(10) .build(); batchReportReader.putDuplicationBlocks(FILE_REF, asList(originBlock)); underTest.execute(new TestComputationStepContext()); verifyZeroInteractions(integrateCrossProjectDuplications); }
@Override public Block apply(@Nonnull CpdTextBlock duplicationBlock) { Block block = Block.builder() .setResourceId(fileKey) .setBlockHash(new ByteArray(duplicationBlock.getHash())) .setIndexInFile(indexInFile) .setLines(duplicationBlock.getStartLine(), duplicationBlock.getEndLine()) .setUnit(duplicationBlock.getStartTokenIndex(), duplicationBlock.getEndTokenIndex()) .build(); indexInFile++; return block; } }
public CloseableIterator<ScannerReport.CpdTextBlock> readCpdTextBlocks(int componentRef) { File file = fileStructure.fileFor(FileStructure.Domain.CPD_TEXT_BLOCKS, componentRef); if (fileExists(file)) { return Protobuf.readStream(file, ScannerReport.CpdTextBlock.parser()); } return emptyCloseableIterator(); }
@Override public String apply(@Nonnull CpdTextBlock duplicationBlock) { return duplicationBlock.getHash(); } }