protected void setSamplesPosition(Map<String, Integer> samplesPosition, boolean checkSorted) { if (samplesPosition == null) { this.samplesPosition = null; return; } if (samplesPosition instanceof LinkedHashMap) { if (!checkSorted || isSamplesPositionMapSorted((LinkedHashMap<String, Integer>) samplesPosition)) { this.samplesPosition = ((LinkedHashMap<String, Integer>) samplesPosition); } else { this.samplesPosition = sortSamplesPositionMap(samplesPosition); } } else { //Sort samples position this.samplesPosition = sortSamplesPositionMap(samplesPosition); } if (getSamplesData() == null || getSamplesData().isEmpty()) { for (int size = samplesPosition.size(); size > 0; size--) { getSamplesData().add(null); } } }
public StudyEntry addSampleData(Integer samplePosition, Integer formatIdx, String value, String defaultValue) { Consumer<List<String>> update = sampleData -> getSamplesData().set(samplePosition, sampleData); if (formatIdx != null && samplePosition != null) { List<String> sampleData = getSamplesData().get(samplePosition); if (sampleData == null) { sampleData = new ArrayList<>(getFormat().size()); getSamplesData().set(samplePosition, sampleData); } if (formatIdx < sampleData.size()) { actOnList(sampleData, l -> l.set(formatIdx, value), update); } else { while (formatIdx > sampleData.size()) { sampleData = actOnList(sampleData, l -> l.add(defaultValue), update); } actOnList(sampleData, l -> l.add(value), update); } } else { throw new IndexOutOfBoundsException(); } return this; }
/** * A variant is RefVariant if all the samples have HomRef genotype (0, 0/0, 0|0, ...). * If the variant does not have genotype, or there is any genotype not homRef, the variant is not a RefVariant. * @param variant Variant to test * @return True if the variant is a reference variant */ protected static boolean isRefVariant(Variant variant) { if (variant.getStudies().size() != 1) { throw new IllegalArgumentException("Required one Study per variant. Found " + variant.getStudies().size() + " studies instead"); } StudyEntry studyEntry = variant.getStudies().get(0); Integer gtIdx = studyEntry.getFormatPositions().get("GT"); if (gtIdx == null || gtIdx < 0) { return false; } for (List<String> data : studyEntry.getSamplesData()) { if (!isHomRef(data.get(gtIdx))) { return false; } } return true; }
private boolean samePhase(Variant variant1, Variant variant2) { if (variant1.getStudies() != null && !variant1.getStudies().isEmpty()) { if (variant2.getStudies() != null && !variant2.getStudies().isEmpty()) { int psIdx1 = variant1.getStudies().get(0).getFormat().indexOf("PS"); if (psIdx1 != -1) { int psIdx2 = variant2.getStudies().get(0).getFormat().indexOf("PS"); if (psIdx2 != -1 && // variant2 does have PS set // same phase set value in both variants variant2.getStudies().get(0).getSamplesData().get(0).get(psIdx2) .equals(variant1.getStudies().get(0).getSamplesData().get(0).get(psIdx1)) // Same genotype call in both variants (e.g. 1|0=1|0). // WARNING: assuming variant1 and variant2 do have Files. && variant1.getStudies().get(0).getFiles().get(0).getCall() .equals(variant2.getStudies().get(0).getFiles().get(0).getCall())) { return true; } } } } return false; }
private boolean samePhase(Variant variant1, Variant variant2) { if (variant1.getStudies() != null && !variant1.getStudies().isEmpty()) { if (variant2.getStudies() != null && !variant2.getStudies().isEmpty()) { int psIdx1 = variant1.getStudies().get(0).getFormat().indexOf("PS"); if (psIdx1 != -1) { int psIdx2 = variant2.getStudies().get(0).getFormat().indexOf("PS"); if (psIdx2 != -1 && // variant2 does have PS set // same phase set value in both variants variant2.getStudies().get(0).getSamplesData().get(0).get(psIdx2) .equals(variant1.getStudies().get(0).getSamplesData().get(0).get(psIdx1)) // Same genotype call in both variants (e.g. 1|0=1|0). // WARNING: assuming variant1 and variant2 do have Files. && variant1.getStudies().get(0).getFiles().get(0).getCall() .equals(variant2.getStudies().get(0).getFiles().get(0).getCall())) { return true; } } } } return false; }
@Override public List<Document> apply(List<Variant> batch) { progressLogger.increment(batch.size(), () -> "up to position " + batch.get(batch.size() - 1)); return batch.stream().map(variant -> { for (StudyEntry studyEntry : variant.getStudies()) { studyEntry.setStudyId(studiesIdRemap.getOrDefault(studyEntry.getStudyId(), studyEntry.getStudyId())); for (FileEntry file : studyEntry.getFiles()) { if (file.getFileId().isEmpty()) { file.setFileId("-1"); } else if (fileIdRemap.containsKey(file.getFileId())) { file.setFileId(fileIdRemap.get(file.getFileId())); } } if (studyEntry.getSamplesData() == null) { studyEntry.setSamplesData(Collections.emptyList()); } } return variant; }).map(variantConverter::convertToStorageType).collect(Collectors.toList()); } }
@Test public void testIncludeAll() { for (Variant variant : allVariants.getResult()) { assertThat(variant.getStudies(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getStats(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getFiles(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getSamplesData(), not(is(Collections.emptyList()))); assertNotNull(variant.getAnnotation()); } }
protected void checkFillMissing(VariantHadoopDBAdaptor dbAdaptor, List<Integer> newFiles, String... processedSamples) { Set<Integer> newFilesSet = new HashSet<>(newFiles); Set<String> samplesSet = new HashSet<>(Arrays.asList(processedSamples)); StudyConfiguration studyConfiguration = dbAdaptor.getStudyConfigurationManager().getStudyConfiguration(STUDY_ID, null).first(); boolean missingGenotypesUpdated = studyConfiguration.getAttributes().getBoolean(MISSING_GENOTYPES_UPDATED); for (Variant variant : dbAdaptor) { StudyEntry studyEntry = variant.getStudies().get(0); boolean newVariant = !missingGenotypesUpdated && studyEntry.getFiles().stream().map(FileEntry::getFileId) .map(studyConfiguration.getFileIds()::get).allMatch(newFilesSet::contains); List<List<String>> samplesData = studyEntry.getSamplesData(); for (int i = 0; i < samplesData.size(); i++) { List<String> data = samplesData.get(i); String sampleName = studyEntry.getOrderedSamplesName().get(i); if (!newVariant && samplesSet.contains(sampleName)) { assertFalse((newVariant ? "new variant " : "") + variant + " _ " + sampleName + " should not have GT=?/?", data.get(0).equals("?/?")); } else { assertFalse((newVariant ? "new variant " : "") + variant + " _ " + sampleName + " should not have GT=0/0", data.get(0).equals("0/0")); } } } }
@Test public void testExcludeSamples() { for (String exclude : Arrays.asList("studies.samplesData", "samplesData", "samples")) { queryResult = query(new Query(), new QueryOptions(QueryOptions.EXCLUDE, exclude)); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getStudies().get(0).getSamplesData(), is(Collections.emptyList())); } } }
private VariantProto.StudyEntry.Builder toProto(StudyEntry study) { VariantProto.StudyEntry.Builder studyBuilder = VariantProto.StudyEntry.newBuilder(); studyBuilder.setStudyId(study.getStudyId()); set(study::getStudyId, studyBuilder::setStudyId); set(study::getFormat, studyBuilder::addAllFormat); for (List<String> sampleData : study.getSamplesData()) { studyBuilder.addSamplesData(VariantProto.StudyEntry.SamplesDataInfoEntry.newBuilder().addAllInfo(sampleData)); } for (Map.Entry<String, VariantStats> entry : study.getStats().entrySet()) { VariantStats stats = entry.getValue(); VariantProto.VariantStats.Builder variantStats = toProto(stats); studyBuilder.putStats(entry.getKey(), variantStats.build()); } for (FileEntry fileEntry : study.getFiles()) { VariantProto.FileEntry.Builder fileBuilder = toProto(fileEntry); studyBuilder.addFiles(fileBuilder); } return studyBuilder; }
@Test public void testSummary() { queryResult = query(new Query(), new QueryOptions(VariantField.SUMMARY, true).append(QueryOptions.LIMIT, 1000)); System.out.println("queryResult = " + ((VariantQueryResult) queryResult).getSource()); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getStudies().get(0).getSamplesData(), is(Collections.emptyList())); assertThat(variant.getStudies().get(0).getFiles(), is(Collections.emptyList())); } }
@Test public void testReturnNoneSamples() { queryResult = query(new Query(INCLUDE_SAMPLE.key(), VariantQueryUtils.NONE), new QueryOptions()); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getStudies().get(0).getSamplesData(), is(Collections.emptyList())); } }
@Test public void resolveSameVariantWithSecAltInsertion() throws Exception { Variant a = getVariant("2:10048155:-:AT", "PASS", "220", "1/2"); Variant b = getVariant("2:10048155:ATATATATATAT:-", "PASS", "220", "2/1"); a.getStudies().get(0).getSecondaryAlternates().add(new AlternateCoordinate("2", b.getStart(), b.getEnd(), b.getReference(), b.getAlternate(), INDEL)); b.getStudies().get(0).getSecondaryAlternates().add(new AlternateCoordinate("2", a.getStart(), a.getEnd(), a.getReference(), a.getAlternate(), INDEL)); List<Variant> resolved = new ArrayList<>(new VariantLocalConflictResolver().resolveConflicts(Arrays.asList(a, b))); System.out.println("a.toString() = " + a.toString()); System.out.println("b.getStudies().get(0).getSecondaryAlternates().get(0).toString() = " + b.getStudies().get(0).getSecondaryAlternates().get(0).toString()); assertEquals(1, resolved.size()); assertEquals(1, resolved.get(0).getStudies().get(0).getSecondaryAlternates().size()); assertEquals("1/2", resolved.get(0).getStudies().get(0).getSamplesData().get(0).get(0)); }
@Test public void testSimpleImport() throws Exception { URI outputFile = newOutputUri().resolve("export.avro"); System.out.println("outputFile = " + outputFile); variantStorageEngine.exportData(outputFile, VariantOutputFormat.AVRO, new Query(), new QueryOptions()); clearDB(DB_NAME); variantStorageEngine.importData(outputFile, new ObjectMap()); for (Variant variant : variantStorageEngine.getDBAdaptor()) { assertEquals(4, variant.getStudies().get(0).getSamplesData().size()); } }
@Test public void testConvertToDataModelTypeWithoutStats() { studyEntry.getSamplesData().clear(); // TODO Samples can't be tested easily, needs a running Mongo instance List<String> sampleNames = null; // Test with no stats converter provided DocumentToStudyVariantEntryConverter converter = new DocumentToStudyVariantEntryConverter(true, studyId, fileId, new DocumentToSamplesConverter(studyId, sampleNames, "0/0")); StudyEntry converted = converter.convertToDataModelType(mongoStudy); assertEquals(studyEntry, converted); }
@Test public void testConvertToDataModelTypeWithoutStatsWithStatsConverter() { studyEntry.getSamplesData().clear(); // TODO Samples can't be tested easily, needs a running Mongo instance List<String> sampleNames = null; // Test with a stats converter provided but no stats object DocumentToStudyVariantEntryConverter converter = new DocumentToStudyVariantEntryConverter(true, studyId, fileId, new DocumentToSamplesConverter(studyId, sampleNames, "0/0")); StudyEntry converted = converter.convertToDataModelType(mongoStudy); assertEquals(studyEntry, converted); }
@Test public void testInclude() { queryResult = query(new Query(), new QueryOptions(QueryOptions.INCLUDE, "studies")); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getStudies(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getStats(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getFiles(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getSamplesData(), not(is(Collections.emptyList()))); assertNull(variant.getAnnotation()); } queryResult = query(new Query(), new QueryOptions(QueryOptions.INCLUDE, "annotation")); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getStudies(), is(Collections.emptyList())); assertNotNull(variant.getAnnotation()); } }
@Test public void testImportEmptySamples() throws Exception { URI outputFile = newOutputUri().resolve("export.avro"); System.out.println("outputFile = " + outputFile); Query query = new Query(VariantQueryParam.INCLUDE_SAMPLE.key(), "."); QueryOptions queryOptions = new QueryOptions(); variantStorageEngine.exportData(outputFile, VariantOutputFormat.AVRO, query, queryOptions); clearDB(DB_NAME); variantStorageEngine.importData(outputFile, new ObjectMap()); for (Variant variant : variantStorageEngine.getDBAdaptor()) { assertEquals(0, variant.getStudies().get(0).getSamplesData().size()); } }
@Test public void testImportExcludeSamples() throws Exception { URI outputFile = newOutputUri().resolve("export.avro"); System.out.println("outputFile = " + outputFile); Query query = new Query(); QueryOptions queryOptions = new QueryOptions(QueryOptions.EXCLUDE, VariantField.STUDIES_SAMPLES_DATA.toString()); variantStorageEngine.exportData(outputFile, VariantOutputFormat.AVRO, query, queryOptions); clearDB(DB_NAME); variantStorageEngine.importData(outputFile, new ObjectMap()); for (Variant variant : variantStorageEngine.getDBAdaptor()) { assertEquals(0, variant.getStudies().get(0).getSamplesData().size()); } }
@Test public void testImportSomeSamples() throws Exception { URI outputFile = newOutputUri().resolve("export.avro"); System.out.println("outputFile = " + outputFile); List<String> samples = new LinkedList<>(studyConfiguration.getSampleIds().keySet()).subList(1, 3); Set<String> samplesSet = new HashSet<>(samples); Query query = new Query(VariantQueryParam.INCLUDE_SAMPLE.key(), samples); variantStorageEngine.exportData(outputFile, VariantOutputFormat.AVRO, query, new QueryOptions()); clearDB(DB_NAME); variantStorageEngine.importData(outputFile, new ObjectMap()); for (Variant variant : variantStorageEngine.getDBAdaptor()) { assertEquals(2, variant.getStudies().get(0).getSamplesData().size()); assertEquals(samplesSet, variant.getStudies().get(0).getSamplesName()); } }