public void setStudies(List<StudyEntry> studies) { if (studies == null) { studyEntries = null; impl.setStudies(new ArrayList<>()); } else { studyEntries = new HashMap<>(studies.size()); impl.setStudies(new ArrayList<>(studies.size())); for (StudyEntry study : studies) { impl.getStudies().add(study.getImpl()); studyEntries.put(study.getStudyId(), study); } } }
/** * converts all the cohortstats within the sourceEntries. * * @param sourceEntries for instance, you can pass in variant.getSourceEntries() * @return list of VariantStats (as Documents) */ public List<Document> convertCohortsToStorageType(Map<String, StudyEntry> sourceEntries) { List<Document> cohortsStatsList = new LinkedList<>(); for (String studyIdFileId : sourceEntries.keySet()) { StudyEntry sourceEntry = sourceEntries.get(studyIdFileId); List<Document> list = convertCohortsToStorageType(sourceEntry.getStats(), Integer.parseInt(sourceEntry.getStudyId())); cohortsStatsList.addAll(list); } return cohortsStatsList; }
private String generateSampleInfo(Variant elem, StudyEntry file, List<String> format) { StringBuilder sb = new StringBuilder(); Iterator<String> sampleIt = elem.getSampleNames(file.getStudyId()).iterator(); Iterator<String> formatIt; Map<String, String> data; String sampleName, formatElem; while (sampleIt.hasNext()) { sampleName = sampleIt.next(); data = file.getSampleDataAsMap(sampleName); formatIt = format.iterator(); while (formatIt.hasNext()) { formatElem = formatIt.next(); sb.append(data.get(formatElem)); if (formatIt.hasNext()) { sb.append(":"); } } if (sampleIt.hasNext()) { sb.append("\t"); } } return sb.toString(); }
public void addStudyEntry(StudyEntry studyEntry) { if (studyEntries == null) { studyEntries = new HashMap<>(); } if (impl.getStudies() == null) { impl.setStudies(new ArrayList<>()); } StudyEntry prevStudy = this.studyEntries.put(studyEntry.getStudyId(), studyEntry); if (prevStudy != null) { impl.getStudies().remove(prevStudy.getImpl()); } impl.getStudies().add(studyEntry.getImpl()); }
@Override public List<Document> apply(List<Variant> batch) { progressLogger.increment(batch.size(), () -> "up to position " + batch.get(batch.size() - 1)); return batch.stream().map(variant -> { for (StudyEntry studyEntry : variant.getStudies()) { studyEntry.setStudyId(studiesIdRemap.getOrDefault(studyEntry.getStudyId(), studyEntry.getStudyId())); for (FileEntry file : studyEntry.getFiles()) { if (file.getFileId().isEmpty()) { file.setFileId("-1"); } else if (fileIdRemap.containsKey(file.getFileId())) { file.setFileId(fileIdRemap.get(file.getFileId())); } } if (studyEntry.getSamplesData() == null) { studyEntry.setSamplesData(Collections.emptyList()); } } return variant; }).map(variantConverter::convertToStorageType).collect(Collectors.toList()); } }
public static String getJoinedSampleFields(Variant variant, StudyEntry file, String sampleName) { return VcfUtils.getJoinedSampleFields(variant.getSourceEntry(file.getFileId(), file.getStudyId()), sampleName); }
protected List<Variant> addSamplesPosition(List<Variant> variants) { if (samplesPositions != null) { for (Variant variant : variants) { for (StudyEntry studyEntry : variant.getStudies()) { LinkedHashMap<String, Integer> samplesPosition = samplesPositions.get(studyEntry.getStudyId()); if (samplesPosition != null) { studyEntry.setSortedSamplesPosition(samplesPosition); } } } } else { for (Variant variant : variants) { if (variant.getStudies().size() == 1) { variant.getStudies().get(0).setSortedSamplesPosition(samplesPosition); } } } return variants; }
protected void setOtherFields(Variant variant, StudyEntry study, VariantFileMetadata fileMetadata, List<String> ids, float quality, String filter, String info, String format, String[] alternateAlleles, String line) { // Fields not affected by the structure of REF and ALT fields if (!ids.isEmpty()) { variant.setIds(ids); } if (quality > -1) { study.addAttribute(fileMetadata.getId(), StudyEntry.QUAL, String.valueOf(quality)); } if (!filter.isEmpty()) { study.addAttribute(fileMetadata.getId(), StudyEntry.FILTER, filter); } if (!info.isEmpty()) { parseInfo(variant, fileMetadata.getId(), study.getStudyId(), info); } study.addAttribute(fileMetadata.getId(), StudyEntry.SRC, line); }
@Override public boolean write(Variant variant) { ArrayList<PopulationFrequency> frequencies = new ArrayList<>(); for (StudyEntry studyEntry : variant.getStudies()) { for (Map.Entry<String, VariantStats> cohortEntry : studyEntry.getStats().entrySet()) { String studyId = studyEntry.getStudyId(); studyId = studyId.substring(studyId.lastIndexOf(":") + 1); PopulationFrequency populationFrequency = converter.convert(studyId, cohortEntry.getKey(), cohortEntry.getValue(), variant.getReference(), variant.getAlternate()); // Write only frequencies non zero if (populationFrequency.getAltAlleleFreq() > 0 && !populationFrequency.getAltAlleleFreq().isNaN()) { frequencies.add(populationFrequency); } } } Variant newVar = new Variant(variant.toString()); VariantAnnotation annotation = new VariantAnnotation(); annotation.setPopulationFrequencies(frequencies); newVar.setAnnotation(annotation); try { sequenceWriter.write(newVar); outputStream.write('\n'); } catch (IOException e) { throw new UncheckedIOException(e); } return true; } }
/** * Create an empty Variant (position, ref, alt) from a template with basic Study information without samples. * @param target Variant to take as a template * @return Variant filled with chromosome, start, end, ref, alt, study ID and format set to GT only, BUT no samples. */ public Variant createFromTemplate(Variant target) { Variant var = new Variant(target.getChromosome(), target.getStart(), target.getEnd(), target.getReference(), target.getAlternate()); var.setType(target.getType()); for(StudyEntry tse : target.getStudies()){ StudyEntry se = new StudyEntry(tse.getStudyId()); se.setFiles(Collections.singletonList(new FileEntry("", "", new HashMap<>()))); se.setFormat(Arrays.asList(getGtKey(), getFilterKey())); se.setSamplesPosition(new HashMap<>()); se.setSamplesData(new ArrayList<>()); var.addStudyEntry(se); } return var; }
private VariantProto.StudyEntry.Builder toProto(StudyEntry study) { VariantProto.StudyEntry.Builder studyBuilder = VariantProto.StudyEntry.newBuilder(); studyBuilder.setStudyId(study.getStudyId()); set(study::getStudyId, studyBuilder::setStudyId); set(study::getFormat, studyBuilder::addAllFormat); for (List<String> sampleData : study.getSamplesData()) { studyBuilder.addSamplesData(VariantProto.StudyEntry.SamplesDataInfoEntry.newBuilder().addAllInfo(sampleData)); } for (Map.Entry<String, VariantStats> entry : study.getStats().entrySet()) { VariantStats stats = entry.getValue(); VariantProto.VariantStats.Builder variantStats = toProto(stats); studyBuilder.putStats(entry.getKey(), variantStats.build()); } for (FileEntry fileEntry : study.getFiles()) { VariantProto.FileEntry.Builder fileBuilder = toProto(fileEntry); studyBuilder.addFiles(fileBuilder); } return studyBuilder; }
@Test public void removeStudyTest() throws Exception { ((VariantMongoDBAdaptor) dbAdaptor).removeStudy(studyConfiguration.getStudyName(), System.currentTimeMillis(), new QueryOptions("purge", false)); for (Variant variant : dbAdaptor) { for (Map.Entry<String, StudyEntry> entry : variant.getStudiesMap().entrySet()) { assertFalse(entry.getValue().getStudyId().equals(studyConfiguration.getStudyId() + "")); } } QueryResult<Long> allVariants = dbAdaptor.count(new Query()); assertEquals(numVariants, allVariants.first().intValue()); }
@Test public void removeAndPurgeStudyTest() throws Exception { ((VariantMongoDBAdaptor) dbAdaptor).removeStudy(studyConfiguration.getStudyName(), System.currentTimeMillis(), new QueryOptions("purge", true)); for (Variant variant : dbAdaptor) { for (Map.Entry<String, StudyEntry> entry : variant.getStudiesMap().entrySet()) { assertFalse(entry.getValue().getStudyId().equals(studyConfiguration.getStudyId() + "")); } } QueryResult<Variant> allVariants = dbAdaptor.get(new Query(), new QueryOptions()); assertEquals(0, allVariants.getNumTotalResults()); }
@Test public void testGetAllVariants_returnedSamples() { int i = 0; Set<String> sampleSet = new HashSet<>(); Iterator<String> iterator = studyConfiguration1.getSampleIds().keySet().iterator(); while (i++ < 5 && iterator.hasNext()) { sampleSet.add(iterator.next()); } query.append(INCLUDE_SAMPLE.key(), new ArrayList<>(sampleSet)); queryResult = dbAdaptor.get(query, options); assertEquals(numVariants, queryResult.getNumResults()); assertEquals(numVariants, queryResult.getNumTotalResults()); for (Variant variant : queryResult.getResult()) { for (StudyEntry sourceEntry : variant.getStudies()) { if (sourceEntry.getStudyId().equals(studyConfiguration1.getStudyName())) { assertEquals("StudyId:" + sourceEntry.getStudyId() + ", SampleNames " + sourceEntry.getSamplesName(), sampleSet, sourceEntry.getSamplesName()); } else { assertEquals("StudyId:" + sourceEntry.getStudyId() + ", SampleNames " + sourceEntry.getSamplesName(), Collections .<String>emptySet(), sourceEntry.getSamplesName()); } } } }
@Test public void testIterator() { int numVariants = 0; Query query = new Query(); for (VariantDBIterator iterator = iterator(query, new QueryOptions()); iterator.hasNext(); ) { Variant variant = iterator.next(); numVariants++; StudyEntry entry = variant.getStudiesMap().entrySet().iterator().next().getValue(); // assertEquals("6", entry.getFileId()); assertEquals(studyConfiguration.getStudyName(), entry.getStudyId()); assertEquals(studyConfiguration.getSampleIds().keySet(), entry.getSamplesName()); } assertEquals(NUM_VARIANTS, numVariants); }
@Test public void testGetAllVariants_filterStudies2_OR_3() { List<String> studyIds = Arrays.asList( studyConfiguration2.getStudyName(), studyConfiguration3.getStudyName()); query.append(STUDY.key(), studyIds); queryResult = dbAdaptor.get(query, options); int expectedVariants = 0; for (Variant variant : allVariants.getResult()) { for (StudyEntry studyEntry : variant.getStudies()) { if (studyIds.contains(studyEntry.getStudyId())) { expectedVariants++; break; } } } assertTrue(expectedVariants > 0); assertEquals(expectedVariants, queryResult.getNumResults()); assertEquals(expectedVariants, queryResult.getNumTotalResults()); for (Variant variant : queryResult.getResult()) { List<String> returnedStudyIds = variant.getStudies().stream().map(StudyEntry::getStudyId).collect(Collectors.toList()); assertThat(returnedStudyIds, anyOf(hasItem(studyConfiguration2.getStudyName()), hasItem(studyConfiguration3.getStudyName()))); } }
@Test public void testGetAllVariants_returnedStudies2_3() { List<String> studyIds = Arrays.asList( studyConfiguration2.getStudyName(), studyConfiguration3.getStudyName()); query.append(INCLUDE_STUDY.key(), studyIds); queryResult = dbAdaptor.get(query, options); assertEquals(numVariants, queryResult.getNumResults()); assertEquals(numVariants, queryResult.getNumTotalResults()); for (Variant variant : queryResult.getResult()) { for (StudyEntry sourceEntry : variant.getStudies()) { assertThat(studyIds, hasItem(sourceEntry.getStudyId())); } } }
@Test public void testGetAllVariants_returnedStudies1() { query.append(INCLUDE_STUDY.key(), studyConfiguration1.getStudyId()); queryResult = dbAdaptor.get(query, options); assertEquals(numVariants, queryResult.getNumResults()); assertEquals(numVariants, queryResult.getNumTotalResults()); for (Variant variant : queryResult.getResult()) { for (StudyEntry sourceEntry : variant.getStudies()) { assertEquals(studyConfiguration1.getStudyName(), sourceEntry.getStudyId()); } } }
@Test public void testGetAllVariants_returnedStudies3() { String studyId = Integer.toString(studyConfiguration3.getStudyId()); query.put(INCLUDE_STUDY.key(), studyId); queryResult = dbAdaptor.get(query, options); assertEquals(numVariants, queryResult.getNumResults()); assertEquals(numVariants, queryResult.getNumTotalResults()); for (Variant variant : queryResult.getResult()) { for (StudyEntry sourceEntry : variant.getStudies()) { assertEquals(studyConfiguration3.getStudyName(), sourceEntry.getStudyId()); } } query.put(INCLUDE_STUDY.key(), studyConfiguration3.getStudyName()); queryResult = dbAdaptor.get(query, options); assertEquals(numVariants, queryResult.getNumResults()); assertEquals(numVariants, queryResult.getNumTotalResults()); for (Variant variant : queryResult.getResult()) { for (StudyEntry sourceEntry : variant.getStudies()) { assertEquals(studyConfiguration3.getStudyName(), sourceEntry.getStudyId()); } } }