@Deprecated public Map<String, VariantStats> getCohortStats() { return getStats(); }
@Deprecated public VariantStats getCohortStats(String cohortName) { return getStats(cohortName); }
@Override protected VariantStats featureValueOf(StudyEntry actual) { return actual.getStats(cohortName); } };
private void addCohortStatsSingleInfoField(StudyEntry studyEntry, Map<String, Object> attributes) { if (studyEntry.getStats() == null || studyEntry.getStats().size() == 0) { return; } List<String> statsList = new ArrayList<>(); for (Map.Entry<String, VariantStats> entry : studyEntry.getStats().entrySet()) { String cohortName = entry.getKey(); VariantStats stats = entry.getValue(); // if (cohortName.equals(StudyEntry.DEFAULT_COHORT)) { // int an = stats.getAltAlleleCount() + stats.getRefAlleleCount(); // if (an >= 0) { // attributes.put(cohortName + VCFConstants.ALLELE_NUMBER_KEY, String.valueOf(an)); // } // if (stats.getAltAlleleCount() >= 0) { // attributes.put(cohortName + VCFConstants.ALLELE_COUNT_KEY, String.valueOf(stats.getAltAlleleCount())); // } // } statsList.add(cohortName + ":" + DECIMAL_FORMAT_7.format(stats.getAltAlleleFreq())); } // set cohort stats attributes attributes.put(STATS_INFO_KEY, String.join(FIELD_SEPARATOR, statsList)); }
/** * converts all the cohortstats within the sourceEntries. * * @param sourceEntries for instance, you can pass in variant.getSourceEntries() * @return list of VariantStats (as Documents) */ public List<Document> convertCohortsToStorageType(Map<String, StudyEntry> sourceEntries) { List<Document> cohortsStatsList = new LinkedList<>(); for (String studyIdFileId : sourceEntries.keySet()) { StudyEntry sourceEntry = sourceEntries.get(studyIdFileId); List<Document> list = convertCohortsToStorageType(sourceEntry.getStats(), Integer.parseInt(sourceEntry.getStudyId())); cohortsStatsList.addAll(list); } return cohortsStatsList; }
private void addCohortStatsMultiInfoField(StudyEntry studyEntry, Map<String, Object> attributes) { if (studyEntry.getStats() == null || studyEntry.getStats().size() == 0) { return; } for (Map.Entry<String, VariantStats> entry : studyEntry.getStats().entrySet()) { String cohortName = entry.getKey(); VariantStats stats = entry.getValue(); if (cohortName.equals(StudyEntry.DEFAULT_COHORT)) { cohortName = ""; int an = stats.getAltAlleleCount(); if (an >= 0) { attributes.put(cohortName + VCFConstants.ALLELE_NUMBER_KEY, String.valueOf(an)); } if (stats.getAltAlleleCount() >= 0) { attributes.put(cohortName + VCFConstants.ALLELE_COUNT_KEY, String.valueOf(stats.getAltAlleleCount())); } } else { cohortName = cohortName + "_"; } attributes.put(cohortName + VCFConstants.ALLELE_FREQUENCY_KEY, DECIMAL_FORMAT_7.format(stats.getAltAlleleFreq())); } }
public void updateFileStats(List<Variant> variants) { int incompleteVariantStats = 0; for (Variant v : variants) { StudyEntry studyEntry = v.getStudy(studyId); if (studyEntry == null) { // The variant is not contained in this file continue; } try { VariantStats stats = studyEntry.getStats(StudyEntry.DEFAULT_COHORT); if (stats != null) { fileStats.update(stats); } } catch (NullPointerException e) { e.printStackTrace(); incompleteVariantStats++; } } if (incompleteVariantStats != 0) { Logger logger = LoggerFactory.getLogger(VariantSourceStats.class); logger.warn("{} VariantStats have needed members as null", incompleteVariantStats); } }
private void addStats(StudyEntry studyEntry, Map<String, Object> attributes) { if (studyEntry.getStats() == null) { return; } for (Map.Entry<String, VariantStats> entry : studyEntry.getStats().entrySet()) { String cohortName = entry.getKey(); VariantStats stats = entry.getValue(); if (cohortName.equals(StudyEntry.DEFAULT_COHORT)) { cohortName = ""; int an = stats.getAltAlleleCount() + stats.getRefAlleleCount(); if (an >= 0) { attributes.put(cohortName + VCFConstants.ALLELE_NUMBER_KEY, String.valueOf(an)); } if (stats.getAltAlleleCount() >= 0) { attributes.put(cohortName + VCFConstants.ALLELE_COUNT_KEY, String.valueOf(stats.getAltAlleleCount())); } } else { cohortName = cohortName + "_"; } attributes.put(cohortName + VCFConstants.ALLELE_FREQUENCY_KEY, DECIMAL_FORMAT_7.format(stats.getAltAlleleFreq())); } }
@Override public boolean write(Variant variant) { ArrayList<PopulationFrequency> frequencies = new ArrayList<>(); for (StudyEntry studyEntry : variant.getStudies()) { for (Map.Entry<String, VariantStats> cohortEntry : studyEntry.getStats().entrySet()) { String studyId = studyEntry.getStudyId(); studyId = studyId.substring(studyId.lastIndexOf(":") + 1); PopulationFrequency populationFrequency = converter.convert(studyId, cohortEntry.getKey(), cohortEntry.getValue(), variant.getReference(), variant.getAlternate()); // Write only frequencies non zero if (populationFrequency.getAltAlleleFreq() > 0 && !populationFrequency.getAltAlleleFreq().isNaN()) { frequencies.add(populationFrequency); } } } Variant newVar = new Variant(variant.toString()); VariantAnnotation annotation = new VariantAnnotation(); annotation.setPopulationFrequencies(frequencies); newVar.setAnnotation(annotation); try { sequenceWriter.write(newVar); outputStream.write('\n'); } catch (IOException e) { throw new UncheckedIOException(e); } return true; } }
public static void checkCalculatedAggregatedStats(Set<String> cohortNames, String dbName) throws Exception { VariantDBAdaptor dbAdaptor = StorageEngineFactory.get().getVariantStorageEngine(null, dbName).getDBAdaptor(); for (Variant variant : dbAdaptor) { for (StudyEntry sourceEntry : variant.getStudies()) { assertEquals(cohortNames, sourceEntry.getStats().keySet()); for (Map.Entry<String, VariantStats> entry : sourceEntry.getStats().entrySet()) { assertTrue(cohortNames.contains(entry.getKey())); } } } } /**
public Variant apply(Variant variant) throws Exception { StudyEntry study = variant.getStudy(studyId); if (study == null) { return variant; } VariantStats stats = study.getStats(cohortName); if (stats == null) { return variant; } VariantHardyWeinbergStats hw = new VariantHardyWeinbergStats( stats.getGenotypeCount().getOrDefault(HOM_REF, 0) + stats.getGenotypeCount().getOrDefault(HOM_REF_PHASED, 0), stats.getGenotypeCount().getOrDefault(HET, 0) + stats.getGenotypeCount().getOrDefault(HET_PHASED, 0), stats.getGenotypeCount().getOrDefault(HOM_ALT, 0) + stats.getGenotypeCount().getOrDefault(HOM_ALT_PHASED, 0) ); hw.calculate(); study.addScore(new VariantScore(SCORE_ID, cohortName, null, hw.getChi2(), hw.getpValue())); return variant; } }
protected void checkAggregatedCohorts(VariantDBAdaptor dbAdaptor, StudyConfiguration studyConfiguration) { for (Variant variant : dbAdaptor) { for (StudyEntry sourceEntry : variant.getStudies()) { Map<String, VariantStats> cohortStats = sourceEntry.getStats(); String calculatedCohorts = cohortStats.keySet().toString(); for (Integer cohortId : studyConfiguration.getCalculatedStats()) { String cohortName = studyConfiguration.getCohortIds().inverse().get(cohortId); assertTrue("CohortStats should contain stats for cohort " + cohortName + ". Only contains stats for " + calculatedCohorts, cohortStats.containsKey(cohortName)); //Check stats are calculated assertValidStats(variant, cohortStats.get(cohortName)); } } } }
@Test public void testIncludeAll() { for (Variant variant : allVariants.getResult()) { assertThat(variant.getStudies(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getStats(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getFiles(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getSamplesData(), not(is(Collections.emptyList()))); assertNotNull(variant.getAnnotation()); } }
@Test public void testExcludeStats() { for (String exclude : Arrays.asList("studies.stats", "stats")) { queryResult = query(new Query(), new QueryOptions(QueryOptions.EXCLUDE, exclude)); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getStudies().get(0).getStats(), not(is(Collections.emptyList()))); } } }
private VariantProto.StudyEntry.Builder toProto(StudyEntry study) { VariantProto.StudyEntry.Builder studyBuilder = VariantProto.StudyEntry.newBuilder(); studyBuilder.setStudyId(study.getStudyId()); set(study::getStudyId, studyBuilder::setStudyId); set(study::getFormat, studyBuilder::addAllFormat); for (List<String> sampleData : study.getSamplesData()) { studyBuilder.addSamplesData(VariantProto.StudyEntry.SamplesDataInfoEntry.newBuilder().addAllInfo(sampleData)); } for (Map.Entry<String, VariantStats> entry : study.getStats().entrySet()) { VariantStats stats = entry.getValue(); VariantProto.VariantStats.Builder variantStats = toProto(stats); studyBuilder.putStats(entry.getKey(), variantStats.build()); } for (FileEntry fileEntry : study.getFiles()) { VariantProto.FileEntry.Builder fileBuilder = toProto(fileEntry); studyBuilder.addFiles(fileBuilder); } return studyBuilder; }
public static void checkCalculatedStats(String studyId, Map<String, Cohort> cohorts, CatalogManager catalogManager, String dbName, String sessionId) throws Exception { VariantDBAdaptor dbAdaptor = StorageEngineFactory.get().getVariantStorageEngine(null, dbName).getDBAdaptor(); for (Variant variant : dbAdaptor) { for (StudyEntry sourceEntry : variant.getStudies()) { assertEquals("In variant " + variant.toString(), cohorts.size(), sourceEntry.getStats().size()); for (Map.Entry<String, VariantStats> entry : sourceEntry.getStats().entrySet()) { assertTrue("In variant " + variant.toString(), cohorts.containsKey(entry.getKey())); if (cohorts.get(entry.getKey()) != null) { assertEquals("Variant: " + variant.toString() + " does not have the correct number of samples in cohort '" + entry.getKey() + "'. jsonVariant: " + variant.toJson() , cohorts.get(entry.getKey()).getSamples().size(), entry.getValue().getGenotypeCount().values().stream().reduce(Integer::sum).orElse(0).intValue()); } } } } for (Cohort cohort : cohorts.values()) { cohort = catalogManager.getCohortManager().get(studyId, cohort.getId(), null, sessionId).first(); assertEquals(Cohort.CohortStatus.READY, cohort.getStatus().getName()); } }
@Test public void testGetAllVariants_missingAllele() throws Exception { queryResult = query(new Query(MISSING_ALLELES.key(), STUDY_NAME + ":" + StudyEntry.DEFAULT_COHORT + ">4"), null); assertEquals(9, queryResult.getNumResults()); queryResult.getResult().stream().map(variant -> variant.getStudiesMap().get(STUDY_NAME).getStats()) .forEach(map -> assertTrue(map.get(StudyEntry.DEFAULT_COHORT).getMissingAlleleCount() > 4)); }
@Test public void removeStatsTest() throws Exception { String deletedCohort = "cohort2"; ((VariantMongoDBAdaptor) dbAdaptor).removeStats(studyConfiguration.getStudyName(), deletedCohort, new QueryOptions()); for (Variant variant : dbAdaptor) { for (Map.Entry<String, StudyEntry> entry : variant.getStudiesMap().entrySet()) { assertFalse("The cohort '" + deletedCohort + "' is not completely deleted in variant: '" + variant + "'", entry.getValue ().getStats().keySet().contains(deletedCohort)); } } QueryResult<Long> allVariants = dbAdaptor.count(new Query()); assertEquals(numVariants, allVariants.first().intValue()); }
@Test public void testInclude() { queryResult = query(new Query(), new QueryOptions(QueryOptions.INCLUDE, "studies")); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getStudies(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getStats(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getFiles(), not(is(Collections.emptyList()))); assertThat(variant.getStudies().get(0).getSamplesData(), not(is(Collections.emptyList()))); assertNull(variant.getAnnotation()); } queryResult = query(new Query(), new QueryOptions(QueryOptions.INCLUDE, "annotation")); assertEquals(allVariants.getResult().size(), queryResult.getResult().size()); for (Variant variant : queryResult.getResult()) { assertThat(variant.getStudies(), is(Collections.emptyList())); assertNotNull(variant.getAnnotation()); } }