public void insert(InputFile inputFile, Collection<Block> blocks) { if (settings.isCrossProjectDuplicationEnabled()) { int id = ((DefaultInputFile) inputFile).scannerId(); if (publisher.getWriter().hasComponentData(FileStructure.Domain.CPD_TEXT_BLOCKS, id)) { throw new UnsupportedOperationException("Trying to save CPD tokens twice for the same file is not supported: " + inputFile.absolutePath()); } final ScannerReport.CpdTextBlock.Builder builder = ScannerReport.CpdTextBlock.newBuilder(); publisher.getWriter().writeCpdTextBlocks(id, blocks.stream().map(block -> { builder.clear(); builder.setStartLine(block.getStartLine()); builder.setEndLine(block.getEndLine()); builder.setStartTokenIndex(block.getStartUnit()); builder.setEndTokenIndex(block.getEndUnit()); builder.setHash(block.getBlockHash().toHexString()); return builder.build(); }).collect(Collectors.toList())); } for (Block block : blocks) { mem.insert(block); } if (blocks.isEmpty()) { LOG.debug("Not enough content in '{}' to have CPD blocks, it will not be part of the duplication detection", inputFile.relativePath()); } indexedFiles.add(inputFile); }
@Test public void write_duplication_blocks() { assertThat(underTest.hasComponentData(FileStructure.Domain.CPD_TEXT_BLOCKS, 1)).isFalse(); ScannerReport.CpdTextBlock duplicationBlock = ScannerReport.CpdTextBlock.newBuilder() .setHash("abcdefghijklmnop") .setStartLine(1) .setEndLine(2) .setStartTokenIndex(10) .setEndTokenIndex(15) .build(); underTest.writeCpdTextBlocks(1, asList(duplicationBlock)); assertThat(underTest.hasComponentData(FileStructure.Domain.CPD_TEXT_BLOCKS, 1)).isTrue(); File file = underTest.getFileStructure().fileFor(FileStructure.Domain.CPD_TEXT_BLOCKS, 1); assertThat(file).exists().isFile(); try (CloseableIterator<ScannerReport.CpdTextBlock> duplicationBlocks = Protobuf.readStream(file, ScannerReport.CpdTextBlock.parser())) { ScannerReport.CpdTextBlock duplicationBlockResult = duplicationBlocks.next(); assertThat(duplicationBlockResult.getHash()).isEqualTo("abcdefghijklmnop"); assertThat(duplicationBlockResult.getStartLine()).isEqualTo(1); assertThat(duplicationBlockResult.getEndLine()).isEqualTo(2); assertThat(duplicationBlockResult.getStartTokenIndex()).isEqualTo(10); assertThat(duplicationBlockResult.getEndTokenIndex()).isEqualTo(15); } }
@Test public void nothing_to_do_when_cpd_text_blocks_exists_but_no_duplicated_found() { when(crossProjectDuplicationStatusHolder.isEnabled()).thenReturn(true); analysisMetadataHolder.setBaseAnalysis(baseProjectAnalysis); ScannerReport.CpdTextBlock originBlock = ScannerReport.CpdTextBlock.newBuilder() .setHash("a8998353e96320ec") .setStartLine(30) .setEndLine(45) .setStartTokenIndex(0) .setEndTokenIndex(10) .build(); batchReportReader.putDuplicationBlocks(FILE_REF, asList(originBlock)); underTest.execute(new TestComputationStepContext()); verifyZeroInteractions(integrateCrossProjectDuplications); }
public void insert(InputFile inputFile, Collection<Block> blocks) { if (settings.isCrossProjectDuplicationEnabled()) { int id = ((DefaultInputFile) inputFile).scannerId(); if (publisher.getWriter().hasComponentData(FileStructure.Domain.CPD_TEXT_BLOCKS, id)) { throw new UnsupportedOperationException("Trying to save CPD tokens twice for the same file is not supported: " + inputFile.absolutePath()); } final ScannerReport.CpdTextBlock.Builder builder = ScannerReport.CpdTextBlock.newBuilder(); publisher.getWriter().writeCpdTextBlocks(id, blocks.stream().map(block -> { builder.clear(); builder.setStartLine(block.getStartLine()); builder.setEndLine(block.getEndLine()); builder.setStartTokenIndex(block.getStartUnit()); builder.setEndTokenIndex(block.getEndUnit()); builder.setHash(block.getBlockHash().toHexString()); return builder.build(); }).collect(Collectors.toList())); } for (Block block : blocks) { mem.insert(block); } if (blocks.isEmpty()) { LOG.debug("Not enough content in '{}' to have CPD blocks, it will not be part of the duplication detection", inputFile.relativePath()); } indexedFiles.add(inputFile); }
ScannerReport.CpdTextBlock originBlock1 = ScannerReport.CpdTextBlock.newBuilder() .setHash("a8998353e96320ec") .setStartLine(30) .setEndTokenIndex(10) .build(); ScannerReport.CpdTextBlock originBlock2 = ScannerReport.CpdTextBlock.newBuilder() .setHash("b1234353e96320ff") .setStartLine(10)
dbSession.commit(); ScannerReport.CpdTextBlock originBlock = ScannerReport.CpdTextBlock.newBuilder() .setHash(hash) .setStartLine(30)
@Test public void nothing_to_do_when_cross_project_duplication_is_disabled() { when(crossProjectDuplicationStatusHolder.isEnabled()).thenReturn(false); analysisMetadataHolder.setBaseAnalysis(baseProjectAnalysis); ComponentDto otherProject = createProject("OTHER_PROJECT_KEY"); SnapshotDto otherProjectSnapshot = createProjectSnapshot(otherProject); ComponentDto otherFIle = createFile("OTHER_FILE_KEY", otherProject); String hash = "a8998353e96320ec"; DuplicationUnitDto duplicate = new DuplicationUnitDto() .setHash(hash) .setStartLine(40) .setEndLine(55) .setIndexInFile(0) .setAnalysisUuid(otherProjectSnapshot.getUuid()) .setComponentUuid(otherFIle.uuid()); dbClient.duplicationDao().insert(dbSession, duplicate); dbSession.commit(); ScannerReport.CpdTextBlock originBlock = ScannerReport.CpdTextBlock.newBuilder() .setHash(hash) .setStartLine(30) .setEndLine(45) .setStartTokenIndex(0) .setEndTokenIndex(10) .build(); batchReportReader.putDuplicationBlocks(FILE_REF, asList(originBlock)); underTest.execute(new TestComputationStepContext()); verifyZeroInteractions(integrateCrossProjectDuplications); }
@Test public void persist_many_cpd_text_blocks() { when(crossProjectDuplicationStatusHolder.isEnabled()).thenReturn(true); reportReader.putDuplicationBlocks(FILE_1_REF, Arrays.asList( CPD_TEXT_BLOCK, ScannerReport.CpdTextBlock.newBuilder() .setHash("b1234353e96320ff") .setStartLine(20) .setEndLine(15) .build())); TestComputationStepContext context = new TestComputationStepContext(); underTest.execute(context); List<Map<String, Object>> dtos = dbTester.select("select HASH, START_LINE, END_LINE, INDEX_IN_FILE, COMPONENT_UUID, ANALYSIS_UUID from duplications_index"); assertThat(dtos).extracting("HASH").containsOnly(CPD_TEXT_BLOCK.getHash(), "b1234353e96320ff"); assertThat(dtos).extracting("START_LINE").containsOnly(30L, 20L); assertThat(dtos).extracting("END_LINE").containsOnly(45L, 15L); assertThat(dtos).extracting("INDEX_IN_FILE").containsOnly(0L, 1L); assertThat(dtos).extracting("COMPONENT_UUID").containsOnly(FILE_1.getUuid()); assertThat(dtos).extracting("ANALYSIS_UUID").containsOnly(ANALYSIS_UUID); context.getStatistics().assertValue("inserts", 2); }
@Test public void read_duplication_blocks() { ScannerReportWriter writer = new ScannerReportWriter(dir); writer.writeMetadata(ScannerReport.Metadata.newBuilder() .setRootComponentRef(1).build()); writer.writeComponent(ScannerReport.Component.newBuilder() .setRef(1).build()); ScannerReport.CpdTextBlock duplicationBlock = ScannerReport.CpdTextBlock.newBuilder() .setHash("abcdefghijklmnop") .setStartLine(1) .setEndLine(2) .setStartTokenIndex(10) .setEndTokenIndex(15) .build(); writer.writeCpdTextBlocks(1, singletonList(duplicationBlock)); ScannerReportReader sut = new ScannerReportReader(dir); assertThat(sut.readCpdTextBlocks(1)).hasSize(1); }