@Override protected Set<String> featureValueOf(StudyEntry actual) { return actual.getSamplesName(); } };
public static VariantStats calculate(Variant variant, StudyEntry study) { return calculate(variant, study, study.getSamplesName()); }
private boolean allSameTypeAndGT(Collection<Variant> conflicts, VariantType type) { boolean differentType = conflicts.stream().filter(v -> !v.getType().equals(type)).findAny().isPresent(); if (differentType) { return false; } StudyEntry studyEntry = conflicts.stream().findAny().get().getStudies().get(0); String sample = studyEntry.getSamplesName().stream().findFirst().get(); String gt = studyEntry.getSampleData(sample, GENOTYPE_KEY); long count = conflicts.stream().filter(v -> v.getType().equals(type) && StringUtils.equals(gt, v.getStudies().get(0).getSampleData(sample, GENOTYPE_KEY))).count(); return ((int) count) == conflicts.size(); }
private Set<String> getSamplesSet(Variant variant, Set<String> samples) { if (variant.getStudies().size() != 1) { throw new VariantQueryException("Unable to process with " + variant.getStudies().size() + " studies."); } samples.addAll(variant.getStudies().get(0).getSamplesName()); if (samples.isEmpty()) { throw new VariantQueryException("Unable to get samples!"); } return samples; }
private boolean checkForDuplicates(Variant current, Variant other, StudyEntry currentStudy, StudyEntry otherStudy, List<AlternateCoordinate> otherAlts) { Set<String> duplicateSamples = otherStudy.getSamplesName().stream() .filter(s -> currentStudy.getSamplesName().contains(s)) .collect(Collectors.toSet()); if (!duplicateSamples.isEmpty()) {
private String variantToString(Variant v) { StringBuilder sb = new StringBuilder(v.getChromosome()); sb.append(":").append(v.getStart()).append("-").append(v.getEnd()); sb.append(v.getReference().isEmpty() ? "-" : v.getReference()); sb.append(":").append(v.getAlternate().isEmpty() ? "-" : v.getAlternate()).append("["); StudyEntry se = getStudy(v); List<List<String>> sd = se.getSamplesData(); for(String sn : se.getSamplesName()){ Integer pos = se.getSamplesPosition().get(sn); if (pos >= sd.size()) { sb.append(sn).append(":S;"); } else if (null == sd.get(pos) || sd.get(pos).size() < 1) { sb.append(sn).append(":G;"); } else { String gt = sd.get(pos).get(0); // GT sb.append(sn).append(":").append(gt).append(";"); } } sb.append("]"); return sb.toString(); }
case "DP": int dp = 0; for (String sampleName : file.getSamplesName()) { String sampleDp = file.getSampleData(sampleName, "DP"); if (StringUtils.isNumeric(sampleDp)) { int mq = 0; int mq0 = 0; for (String sampleName : file.getSamplesName()) { if (StringUtils.isNumeric(file.getSampleData(sampleName, "GQ"))) { int gq = Integer.parseInt(file.getSampleData(sampleName, "GQ"));
for (String sample : newSe.getSamplesName()) { List<String> unknownGenotypeData = samplesWithUnknownGenotype.get(sample); if (unknownGenotypeData != null) {
public void checkSampleData(Variant variant, StudyConfiguration studyConfiguration, Integer fileId, Function<Integer, String> valueProvider, String field) { assertTrue(studyConfiguration.getFileIds().values().contains(fileId)); studyConfiguration.getSamplesInFiles().get(fileId).forEach((sampleId) -> { String sampleName = studyConfiguration.getSampleIds().inverse().get(sampleId); StudyEntry study = variant.getStudy(studyConfiguration.getStudyName()); assertTrue(study.getSamplesName().contains(sampleName)); assertEquals("Variant=" + variant + " StudyId=" + studyConfiguration.getStudyId() + " FileId=" + fileId + " Field=" + field + " Sample=" + sampleName + " (" + sampleId + ")\n"+variant.toJson(), valueProvider.apply(sampleId), study.getSampleData(sampleName, field)); }); }
@Test public void testGetAllVariants_returnedSamples() { int i = 0; Set<String> sampleSet = new HashSet<>(); Iterator<String> iterator = studyConfiguration1.getSampleIds().keySet().iterator(); while (i++ < 5 && iterator.hasNext()) { sampleSet.add(iterator.next()); } query.append(INCLUDE_SAMPLE.key(), new ArrayList<>(sampleSet)); queryResult = dbAdaptor.get(query, options); assertEquals(numVariants, queryResult.getNumResults()); assertEquals(numVariants, queryResult.getNumTotalResults()); for (Variant variant : queryResult.getResult()) { for (StudyEntry sourceEntry : variant.getStudies()) { if (sourceEntry.getStudyId().equals(studyConfiguration1.getStudyName())) { assertEquals("StudyId:" + sourceEntry.getStudyId() + ", SampleNames " + sourceEntry.getSamplesName(), sampleSet, sourceEntry.getSamplesName()); } else { assertEquals("StudyId:" + sourceEntry.getStudyId() + ", SampleNames " + sourceEntry.getSamplesName(), Collections .<String>emptySet(), sourceEntry.getSamplesName()); } } } }
@Test public void testImportSomeSamples() throws Exception { URI outputFile = newOutputUri().resolve("export.avro"); System.out.println("outputFile = " + outputFile); List<String> samples = new LinkedList<>(studyConfiguration.getSampleIds().keySet()).subList(1, 3); Set<String> samplesSet = new HashSet<>(samples); Query query = new Query(VariantQueryParam.INCLUDE_SAMPLE.key(), samples); variantStorageEngine.exportData(outputFile, VariantOutputFormat.AVRO, query, new QueryOptions()); clearDB(DB_NAME); variantStorageEngine.importData(outputFile, new ObjectMap()); for (Variant variant : variantStorageEngine.getDBAdaptor()) { assertEquals(2, variant.getStudies().get(0).getSamplesData().size()); assertEquals(samplesSet, variant.getStudies().get(0).getSamplesName()); } }
@Test public void testIterator() { int numVariants = 0; Query query = new Query(); for (VariantDBIterator iterator = iterator(query, new QueryOptions()); iterator.hasNext(); ) { Variant variant = iterator.next(); numVariants++; StudyEntry entry = variant.getStudiesMap().entrySet().iterator().next().getValue(); // assertEquals("6", entry.getFileId()); assertEquals(studyConfiguration.getStudyName(), entry.getStudyId()); assertEquals(studyConfiguration.getSampleIds().keySet(), entry.getSamplesName()); } assertEquals(NUM_VARIANTS, numVariants); }
StudyEntry originalStudyEntry = originalVariant.getStudy(STUDY_NAME); StudyEntry exportedStudyEntry = exportedVariant.getStudy(STUDY_NAME); for (String sampleName : originalStudyEntry.getSamplesName()) { assertWithConflicts(exportedVariant, () -> assertEquals("For sample '" + sampleName + "', id " + studyConfiguration.getSampleIds().get(sampleName)
@Test public void testMultiSampleFile() throws Exception { StudyConfiguration studyConfiguration = VariantStorageBaseTest.newStudyConfiguration(); VariantHadoopDBAdaptor dbAdaptor = getVariantStorageEngine().getDBAdaptor(); loadFile("s1_s2.genome.vcf", studyConfiguration, new ObjectMap(HadoopVariantStorageEngine.VARIANT_TABLE_LOAD_REFERENCE, true)); checkArchiveTableTimeStamp(dbAdaptor); printVariants(studyConfiguration, dbAdaptor, newOutputUri()); int numHomRef = 0; for (Variant variant : dbAdaptor) { StudyEntry study = variant.getStudies().get(0); for (String s : study.getSamplesName()) { String gt = study.getSampleData(s, "GT"); assertNotEquals(GenotypeClass.UNKNOWN_GENOTYPE, gt); if (GenotypeClass.HOM_REF.test(gt)) { numHomRef++; assertTrue(StringUtils.isNumeric(study.getSampleData(s, "DP"))); } } } assertNotEquals(0, numHomRef); }
iterator.forEachRemaining(variant -> { assertEquals(1, variant.getStudy(STUDY_NAME).getSamplesData().size()); assertEquals(Collections.singleton("SAMPLE_1"), variant.getStudy(STUDY_NAME).getSamplesName()); assertTrue(variant.getStudy(STUDY_NAME).getFiles().size() > 0); assertTrue(variant.getStudy(STUDY_NAME).getFiles().size() <= 2); iterator.forEachRemaining(variant -> { assertEquals(1, variant.getStudy(STUDY_NAME).getSamplesData().size()); assertEquals(Collections.singleton("SAMPLE_2"), variant.getStudy(STUDY_NAME).getSamplesName()); assertTrue(variant.getStudy(STUDY_NAME).getFiles().size() > 0); assertTrue(variant.getStudy(STUDY_NAME).getFiles().size() <= 2); System.out.println("variant.toJson() = " + variant.toJson()); assertEquals(1, variant.getStudy(STUDY_NAME).getSamplesData().size()); assertEquals(Collections.singleton("SAMPLE_2"), variant.getStudy(STUDY_NAME).getSamplesName()); if (!variant.getStudy(STUDY_NAME).getFiles().isEmpty()) { assertEquals("variant-test-somatic_2.vcf", variant.getStudy(STUDY_NAME).getFiles().get(0).getFileId());