@Test public void testLinkFile() throws CatalogException, IOException, URISyntaxException { URI uri = getClass().getResource("/biofiles/variant-test-file-dot-names.vcf.gz").toURI(); QueryResult<File> link = fileManager.link(studyFqn, uri, ".", new ObjectMap(), sessionIdUser); assertEquals(4, link.first().getSamples().size()); List<Long> sampleList = link.first().getSamples().stream().map(Sample::getUid).collect(Collectors.toList()); Query query = new Query(SampleDBAdaptor.QueryParams.UID.key(), sampleList); QueryResult<Sample> sampleQueryResult = catalogManager.getSampleManager().get(studyFqn, query, QueryOptions.empty(), sessionIdUser); assertEquals(4, sampleQueryResult.getNumResults()); List<String> sampleNames = sampleQueryResult.getResult().stream().map(Sample::getId).collect(Collectors.toList()); assertTrue(sampleNames.contains("test-name.bam")); assertTrue(sampleNames.contains("NA19660")); assertTrue(sampleNames.contains("NA19661")); assertTrue(sampleNames.contains("NA19685")); }
protected List<StoragePipelineResult> loadFiles(List<File> files, List<File> expectedLoadedFiles, QueryOptions queryOptions, String outputId) throws Exception { queryOptions.append(VariantFileIndexerStorageOperation.TRANSFORM, false); queryOptions.append(VariantFileIndexerStorageOperation.LOAD, true); queryOptions.append(StorageOperation.CATALOG_PATH, outputId); boolean calculateStats = queryOptions.getBoolean(VariantStorageEngine.Options.CALCULATE_STATS.key()); String studyId = catalogManager.getFileManager().getStudy(files.get(0), sessionId).getId(); List<String> fileIds = files.stream().map(File::getId).collect(Collectors.toList()); String outdir = opencga.createTmpOutdir(studyId, "_LOAD_", sessionId); List<StoragePipelineResult> etlResults = variantManager.index(studyId, fileIds, outdir, queryOptions, sessionId); assertEquals(expectedLoadedFiles.size(), etlResults.size()); checkEtlResults(studyId, etlResults, FileIndex.IndexStatus.READY); Cohort defaultCohort = getDefaultCohort(studyId); for (File file : expectedLoadedFiles) { List<Long> samplesInFile = file.getSamples().stream().map(Sample::getUid).collect(Collectors.toList()); List<Long> samplesInCohort = defaultCohort.getSamples().stream().map(Sample::getUid).collect(Collectors.toList()); assertTrue(samplesInCohort.containsAll(samplesInFile)); } if (calculateStats) { assertEquals(Cohort.CohortStatus.READY, defaultCohort.getStatus().getName()); checkCalculatedStats(studyId, Collections.singletonMap(DEFAULT_COHORT, defaultCohort), catalogManager, dbName, sessionId); } return etlResults; }
public void before () throws Exception { File file = opencga.createFile(studyId, "1000g_batches/1-500.filtered.10k.chr22.phase3_shapeit2_mvncall_integrated_v5.20130502.genotypes.vcf.gz", sessionId); for (int i = 0; i < coh.length; i++) { Cohort cohort = catalogManager.getCohortManager().create(studyId, "coh" + i, Study.Type.CONTROL_SET, "", file.getSamples().subList(file.getSamples() .size() / coh.length * i, file.getSamples().size() / coh.length * (i + 1)), null, null, sessionId).first(); coh[i] = cohort.getId(); } QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false); queryOptions.putIfNotNull(StorageOperation.CATALOG_PATH, outputId); variantManager.index(studyId, file.getId(), createTmpOutdir(file), queryOptions, sessionId); all = catalogManager.getCohortManager().get(studyId, new Query(CohortDBAdaptor.QueryParams.ID.key(), DEFAULT_COHORT), new QueryOptions(), sessionId).first().getId(); }
@Test public void testIndex() throws Exception { URI uri = getClass().getResource("/biofiles/variant-test-file.vcf.gz").toURI(); File file = fileManager.link(studyFqn, uri, "", null, sessionIdUser).first(); assertEquals(4, file.getSamples().size()); assertEquals(File.Format.VCF, file.getFormat()); assertEquals(File.Bioformat.VARIANT, file.getBioformat()); Job job = fileManager.index(studyFqn, Collections.singletonList(file.getName()), "VCF", null, sessionIdUser).first(); assertEquals(file.getUid(), job.getInput().get(0).getUid()); }
private void removeFile(List<File> files, QueryOptions options, String outputId) throws Exception { List<String> fileIds = files.stream().map(File::getId).collect(Collectors.toList()); Study study = catalogManager.getFileManager().getStudy(files.get(0), sessionId); String studyId = study.getFqn(); List<File> removedFiles = variantManager.removeFile(fileIds, studyId, sessionId, new QueryOptions()); assertEquals(files.size(), removedFiles.size()); Cohort all = catalogManager.getCohortManager().get(studyId, new Query(CohortDBAdaptor.QueryParams.ID.key(), StudyEntry.DEFAULT_COHORT), null, sessionId).first(); Set<Long> allSampleIds = all.getSamples().stream().map(Sample::getUid).collect(Collectors.toSet()); assertThat(all.getStatus().getName(), anyOf(is(Cohort.CohortStatus.INVALID), is(Cohort.CohortStatus.NONE))); Set<Long> loadedSamples = catalogManager.getFileManager().get(studyId, new Query(FileDBAdaptor.QueryParams.INDEX_STATUS_NAME.key (), FileIndex.IndexStatus.READY), null, sessionId) .getResult() .stream() .flatMap(f -> f.getSamples().stream()) .map(Sample::getUid) .collect(Collectors.toSet()); assertEquals(loadedSamples, allSampleIds); for (File file : removedFiles) { assertEquals(FileIndex.IndexStatus.TRANSFORMED, file.getIndex().getStatus().getName()); } }
public static File create(String resourceName, boolean indexed) throws IOException, CatalogException { File file; URI uri = getResourceUri(resourceName); file = fileMetadataReader.create(studyId, uri, "data/vcfs/", "", true, null, sessionId).first(); catalogFileUtils.upload(uri, file, null, sessionId, false, false, true, false, Long.MAX_VALUE); if (indexed) { FileIndex fileIndex = new FileIndex("user", "today", new FileIndex.IndexStatus(FileIndex.IndexStatus.READY), 1234, Collections.emptyMap()); catalogManager.getFileManager().setFileIndex(studyId, file.getPath(), fileIndex, sessionId); indexedFiles.add((int) file.getUid()); List<String> samples = catalogManager.getCohortManager().getSamples(studyId, cohortId, null, sessionId).getResult().stream().map(Sample::getId).collect(Collectors.toList()); samples.addAll(file.getSamples().stream().map(Sample::getId).collect(Collectors.toList())); catalogManager.getCohortManager().update(studyId, cohortId, new ObjectMap(CohortDBAdaptor.QueryParams.SAMPLES.key(), samples), true, null, sessionId); } return catalogManager.getFileManager().get(studyId, file.getId(), null, sessionId).first(); }
@Test public void testGetMetadataFromVcf() throws CatalogException, FileNotFoundException { File file = catalogManager.getFileManager().upload(study.getFqn(), vcfFileUri, new File().setPath(folder.getPath() + VCF_FILE_NAME), false, false, sessionIdUser).first(); assertTrue(file.getSize() > 0); file = FileMetadataReader.get(catalogManager). setMetadataInformation(file, null, null, sessionIdUser, false); assertEquals(File.FileStatus.READY, file.getStatus().getName()); assertEquals(File.Format.VCF, file.getFormat()); assertEquals(File.Bioformat.VARIANT, file.getBioformat()); assertNotNull(file.getAttributes().get(VARIANT_FILE_METADATA)); assertEquals(4, file.getSamples().size()); assertEquals(expectedSampleNames, ((Map<String, Object>) file.getAttributes().get(VARIANT_FILE_METADATA)).get("sampleIds")); catalogManager.getSampleManager().get(study.getFqn(), new Query(SampleDBAdaptor.QueryParams.ID.key(), file.getSamples().stream().map(Sample::getId).collect(Collectors.toList())), new QueryOptions(), sessionIdUser).getResult(); assertTrue(expectedSampleNames.containsAll(file.getSamples().stream().map(Sample::getId).collect(Collectors.toSet()))); }
@Override public Document convertToStorageType(File file, List<VariableSet> variableSetList) { Document document = super.convertToStorageType(file, variableSetList); document.remove(SampleDBAdaptor.QueryParams.ANNOTATION_SETS.key()); document.put("uid", file.getUid()); document.put("studyUid", file.getStudyUid()); long jobId = file.getJob() != null ? (file.getJob().getUid() == 0 ? -1L : file.getJob().getUid()) : -1L; document.put("job", new Document("uid", jobId)); document.put("samples", convertSamples(file.getSamples())); document.put(FileMongoDBAdaptor.REVERSE_NAME, StringUtils.reverse(file.getName())); return document; }
@Test public void testIterator() throws CatalogException, SolrServerException, IOException { MongoDBAdaptorFactory factory = new MongoDBAdaptorFactory(catalogManager.getConfiguration()); FileMongoDBAdaptor fileMongoDBAdaptor = factory.getCatalogFileDBAdaptor(); QueryOptions queryOptions = new QueryOptions(); queryOptions.add(QueryOptions.INCLUDE, Arrays.asList(FileDBAdaptor.QueryParams.ID.key(), FileDBAdaptor.QueryParams.SAMPLE_UIDS.key())); //queryOptions.add("nativeQuery", true); DBIterator<File> fileDBIterator = fileMongoDBAdaptor.iterator(new Query("uid", 1000000154L), queryOptions); boolean found = false; while (fileDBIterator.hasNext()) { if (fileDBIterator.next().getSamples().size() > 0) { System.out.println("found"); found = true; } } if (!found) { System.out.println("nothing"); } }
@Test public void testAddSamples() throws Exception { long studyUid = user3.getProjects().get(0).getStudies().get(0).getUid(); Sample sample1 = catalogDBAdaptor.getCatalogSampleDBAdaptor().insert(studyUid, new Sample().setId("sample1").setStatus(new Status()), QueryOptions.empty()).first(); Sample sample2 = catalogDBAdaptor.getCatalogSampleDBAdaptor().insert(studyUid, new Sample().setId("sample2").setStatus(new Status()), QueryOptions.empty()).first(); File file = user3.getProjects().get(0).getStudies().get(0).getFiles().get(0); catalogFileDBAdaptor.addSamplesToFile(file.getUid(), Arrays.asList(sample1, sample2)); QueryResult<File> fileQueryResult = catalogFileDBAdaptor.get(file.getUid(), QueryOptions.empty()); assertEquals(2, fileQueryResult.first().getSamples().size()); assertTrue(Arrays.asList(sample1.getUid(), sample2.getUid()).containsAll( fileQueryResult.first().getSamples().stream().map(Sample::getUid).collect(Collectors.toList()))); Sample sample3 = catalogDBAdaptor.getCatalogSampleDBAdaptor().insert(studyUid, new Sample().setId("sample3").setStatus(new Status()), QueryOptions.empty()).first(); // Test we avoid duplicities catalogFileDBAdaptor.addSamplesToFile(file.getUid(), Arrays.asList(sample1, sample2, sample2, sample3)); fileQueryResult = catalogFileDBAdaptor.get(file.getUid(), QueryOptions.empty()); assertEquals(3, fileQueryResult.first().getSamples().size()); assertTrue(Arrays.asList(sample1.getUid(), sample2.getUid(), sample3.getUid()).containsAll( fileQueryResult.first().getSamples().stream().map(Sample::getUid).collect(Collectors.toList()))); }
.map(Sample::getUid) .collect(Collectors.toList()); samples.add(files.get(0).getSamples().get(0).getUid());
private void checkStudyConfiguration(Study study, StudyConfiguration studyConfiguration) throws CatalogException { assertEquals("user@p1:s1", studyConfiguration.getStudyName()); assertEquals(study.getUid(), studyConfiguration.getStudyId()); assertTrue(studyConfiguration.getInvalidStats().isEmpty()); for (Map.Entry<String, Integer> entry : studyConfiguration.getFileIds().entrySet()) { File file = catalogManager.getFileManager().get(studyConfiguration.getStudyName(), studyConfiguration.getFileIds().inverse().get(entry.getValue()), null, sessionId).first(); assertEquals(file.getName(), entry.getKey()); int id = (int) file.getUid(); assertEquals(file.getSamples().stream().map(Sample::getUid).map(Long::intValue).collect(Collectors.toSet()), studyConfiguration.getSamplesInFiles().get((id))); if (file.getIndex() == null || file.getIndex().getStatus() == null || file.getIndex().getStatus().getName() == null || !file.getIndex().getStatus().getName().equals(FileIndex.IndexStatus.READY)) { assertFalse(studyConfiguration.getIndexedFiles().contains(id)); // assertFalse("Should not contain header for file " + file.getId(), studyConfiguration.getHeaders().containsKey(id)); } // else { // assertTrue(studyConfiguration.getIndexedFiles().contains(id)); // assertTrue("Missing header for file " + file.getId(), studyConfiguration.getHeaders().containsKey(id)); // assertTrue("Missing header for file " + file.getId(), !studyConfiguration.getHeaders().get(id).isEmpty()); // } } }
@Test public void testDeleteSampleFromIndexedFile() throws Exception { QueryOptions queryOptions = new QueryOptions(VariantStorageEngine.Options.ANNOTATE.key(), false) .append(VariantStorageEngine.Options.CALCULATE_STATS.key(), false); File inputFile = getFile(0); indexFile(inputFile, queryOptions, outputId); Query query = new Query(SampleDBAdaptor.QueryParams.ID.key(), inputFile.getSamples().get(100).getId()); WriteResult delete = catalogManager.getSampleManager().delete(studyFqn, query, null, sessionId); assertEquals(1, delete.getNumMatches()); assertTrue(delete.getFailed().get(0).getMessage().contains("Sample associated to the files")); }
@Test public void testRemoveSamples() throws Exception { long studyUid = user3.getProjects().get(0).getStudies().get(0).getUid(); Sample sample1 = catalogDBAdaptor.getCatalogSampleDBAdaptor().insert(studyUid, new Sample().setId("sample1").setStatus(new Status()), QueryOptions.empty()).first(); Sample sample2 = catalogDBAdaptor.getCatalogSampleDBAdaptor().insert(studyUid, new Sample().setId("sample2").setStatus(new Status()), QueryOptions.empty()).first(); Sample sample3 = catalogDBAdaptor.getCatalogSampleDBAdaptor().insert(studyUid, new Sample().setId("sample3").setStatus(new Status()), QueryOptions.empty()).first(); File file = user3.getProjects().get(0).getStudies().get(0).getFiles().get(0); catalogFileDBAdaptor.addSamplesToFile(file.getUid(), Arrays.asList(sample1, sample2, sample3)); QueryResult<File> fileQueryResult = catalogFileDBAdaptor.get(file.getUid(), QueryOptions.empty()); assertEquals(3, fileQueryResult.first().getSamples().size()); assertTrue(Arrays.asList(sample1.getUid(), sample2.getUid(), sample3.getUid()) .containsAll(fileQueryResult.first().getSamples().stream().map(Sample::getUid).collect(Collectors.toList()))); catalogFileDBAdaptor.extractSampleFromFiles(new Query(FileDBAdaptor.QueryParams.UID.key(), file.getUid()), Arrays.asList(sample1.getUid(), sample3.getUid())); fileQueryResult = catalogFileDBAdaptor.get(file.getUid(), QueryOptions.empty()); assertEquals(1, fileQueryResult.first().getSamples().size()); assertTrue(fileQueryResult.first().getSamples().get(0).getUid() == sample2.getUid()); }
List<Long> samplesInCohort = defaultCohort.getSamples().stream().map(Sample::getUid).collect(Collectors.toList()); for (File file : expectedLoadedFiles) { Long[] samplesInFiles = file.getSamples().stream().map(Sample::getUid).collect(Collectors.toList()).toArray(new Long[0]);
@Test public void testCreate() throws CatalogException { QueryResult<File> fileQueryResult = FileMetadataReader.get(catalogManager). create(study.getFqn(), vcfFileUri, folder.getPath() + VCF_FILE_NAME, "", false, null, sessionIdUser); File file = fileQueryResult.first(); assertEquals(File.FileStatus.STAGE, file.getStatus().getName()); assertEquals(File.Format.VCF, file.getFormat()); assertEquals(File.Bioformat.VARIANT, file.getBioformat()); assertNotNull(file.getAttributes().get(VARIANT_FILE_METADATA)); assertEquals(4, file.getSamples().size()); assertEquals(21499, file.getSize()); new FileUtils(catalogManager).upload(vcfFileUri, file, null, sessionIdUser, false, false, true, true, Integer.MAX_VALUE); file = catalogManager.getFileManager().get(study.getFqn(), file.getPath(), null, sessionIdUser).first(); assertEquals(File.FileStatus.READY, file.getStatus().getName()); assertEquals(File.Format.VCF, file.getFormat()); assertEquals(File.Bioformat.VARIANT, file.getBioformat()); assertNotNull(file.getAttributes().get(VARIANT_FILE_METADATA)); assertNotNull(((Map) file.getAttributes().get(VARIANT_FILE_METADATA)).get("sampleIds")); assertEquals(4, ((List) ((Map) file.getAttributes().get(VARIANT_FILE_METADATA)).get("sampleIds")).size()); assertNotNull(((Map) file.getAttributes().get(VARIANT_FILE_METADATA)).get("header")); assertEquals(4, file.getSamples().size()); assertEquals(21499, file.getSize()); }
@Test public void testGetMetadataFromBam() throws CatalogException, FileNotFoundException { File file = catalogManager.getFileManager().upload(study.getFqn(), bamFileUri, new File().setPath(folder.getPath() + BAM_FILE_NAME), false, false, sessionIdUser).first(); assertTrue(file.getSize() > 0); file = FileMetadataReader.get(catalogManager). setMetadataInformation(file, null, null, sessionIdUser, false); assertEquals(File.FileStatus.READY, file.getStatus().getName()); // assertEquals(File.Format.GZIP, file.getFormat()); assertEquals(File.Bioformat.ALIGNMENT, file.getBioformat()); assertNotNull(file.getAttributes().get("alignmentHeader")); assertEquals(1, file.getSamples().size()); assertEquals("HG00096", catalogManager.getSampleManager().get(study.getFqn(), file.getSamples().get(0).getId(), null, sessionIdUser).first().getId()); }
@Test public void testDoNotOverwriteSampleIds() throws CatalogException, FileNotFoundException { File file = catalogManager.getFileManager().upload(study.getFqn(), vcfFileUri, new File().setPath(folder.getPath() + VCF_FILE_NAME), false, false, sessionIdUser).first(); assertEquals(File.FileStatus.READY, file.getStatus().getName()); assertEquals(File.Format.VCF, file.getFormat()); assertEquals(File.Bioformat.VARIANT, file.getBioformat()); assertNotNull(file.getAttributes().get(VARIANT_FILE_METADATA)); assertEquals(4, file.getSamples().size()); //Add a sampleId String sampleId = catalogManager.getSampleManager().create(study.getFqn(), new Sample().setId("Bad_Sample"), null, sessionIdUser) .first().getId(); catalogManager.getFileManager().update(study.getFqn(), file.getPath(), new ObjectMap(FileDBAdaptor.QueryParams.SAMPLES.key(), Collections.singletonList(sampleId)), new QueryOptions(), sessionIdUser); file = catalogManager.getFileManager().get(study.getFqn(), file.getPath(), null, sessionIdUser).first(); assertEquals(5, file.getSamples().size()); assertEquals(sampleId, file.getSamples().get(4).getId()); }
assert (fileSolrModel.getSize() == file.getSize()); assert (fileSolrModel.getSoftware().equals(file.getSoftware().getName())); assert (fileSolrModel.getNumSamples() == file.getSamples().size());
sessionIdUser); assertEquals(1, fileQueryResult.getNumResults()); assertEquals(0, fileQueryResult.first().getSamples().size()); sessionIdUser); assertEquals(1, fileQueryResult.getNumResults()); assertEquals(2, fileQueryResult.first().getSamples().size()); for (Sample sample : fileQueryResult.first().getSamples()) { assertTrue(sample.getUid() > 0); assertTrue(org.apache.commons.lang3.StringUtils.isEmpty(sample.getId())); sessionIdUser); assertEquals(1, fileQueryResult.getNumResults()); assertEquals(2, fileQueryResult.first().getSamples().size()); for (Sample sample : fileQueryResult.first().getSamples()) { if (sample.getId().equals(sample1.getId())) { assertEquals(2, sample.getVersion());