public Map<String, Object> annotate(final RefMetaDataTracker tracker, final AnnotatorCompatible walker, final ReferenceContext ref, final Map<String, AlignmentContext> stratifiedContexts, final VariantContext vc, final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) { if ( ! vc.hasGenotypes() ) return null; //if none of the "founders" are in the vc samples, assume we uniquified the samples upstream and they are all founders if (!didUniquifiedSampleNameCheck) { checkSampleNames(vc); didUniquifiedSampleNameCheck = true; } return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap<String, Object>(), true,founderIds); }
public boolean hasGenotypes() { return vcs.get(0).hasGenotypes(); }
public Map<String, Object> annotate(final RefMetaDataTracker tracker, final AnnotatorCompatible walker, final ReferenceContext ref, final Map<String, AlignmentContext> stratifiedContexts, final VariantContext vc, final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) { if ( vc.isMonomorphicInSamples() || !vc.hasGenotypes() ) return null; final StringBuilder samples = new StringBuilder(); for ( Genotype genotype : vc.getGenotypesOrderedByName() ) { if ( genotype.isCalled() && !genotype.isHomRef() ){ if ( samples.length() > 0 ) samples.append(","); samples.append(genotype.getSampleName()); } } if ( samples.length() == 0 ) return null; Map<String, Object> map = new HashMap<String, Object>(); map.put(getKeyNames().get(0), samples.toString()); return map; }
public void validateAlternateAlleles() { if ( !hasGenotypes() ) return; List<Allele> reportedAlleles = getAlleles(); Set<Allele> observedAlleles = new HashSet<Allele>(); observedAlleles.add(getReference()); for ( final Genotype g : getGenotypes() ) { if ( g.isCalled() ) observedAlleles.addAll(g.getAlleles()); } if ( observedAlleles.contains(Allele.NO_CALL) ) observedAlleles.remove(Allele.NO_CALL); if ( reportedAlleles.size() != observedAlleles.size() ) throw new TribbleException.InternalCodecException(String.format("one or more of the ALT allele(s) for the record at position %s:%d are not observed at all in the sample genotypes", getChr(), getStart())); int originalSize = reportedAlleles.size(); // take the intersection and see if things change observedAlleles.retainAll(reportedAlleles); if ( observedAlleles.size() != originalSize ) throw new TribbleException.InternalCodecException(String.format("one or more of the ALT allele(s) for the record at position %s:%d are not observed at all in the sample genotypes", getChr(), getStart())); }
@Override public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( comp != null ) { // we only need to consider sites in comp if ( REQUIRE_IDENTICAL_ALLELES && (eval != null && haveDifferentAltAlleles(eval, comp))) nDifferentAlleleSites++; else { SiteStatus evalStatus = calcSiteStatus(eval); final Set<String> evalSamples = getWalker().getSampleNamesForEvaluation(); if ( comp.hasGenotypes() && ! evalSamples.isEmpty() && comp.hasGenotypes(evalSamples) ) // if we have genotypes in both eval and comp, subset comp down just the samples in eval comp = comp.subContextFromSamples(evalSamples, false); SiteStatus compStatus = calcSiteStatus(comp); counts[compStatus.ordinal()][evalStatus.ordinal()]++; } } }
/** * Genotype-specific functions -- are the genotypes monomorphic w.r.t. to the alleles segregating at this * site? That is, is the number of alternate alleles among all fo the genotype == 0? * * @return true if it's monomorphic */ public boolean isMonomorphicInSamples() { if ( monomorphic == null ) monomorphic = ! isVariant() || (hasGenotypes() && getCalledChrCount(getReference()) == getCalledChrCount()); return monomorphic; }
/** * Genotype-specific functions -- are the genotypes monomorphic w.r.t. to the alleles segregating at this * site? That is, is the number of alternate alleles among all fo the genotype == 0? * * @return true if it's monomorphic */ public boolean isMonomorphicInSamples() { if ( monomorphic == null ) monomorphic = ! isVariant() || (hasGenotypes() && getCalledChrCount(getReference()) == getCalledChrCount()); return monomorphic; }
/** * Genotype-specific functions -- are the genotypes monomorphic w.r.t. to the alleles segregating at this * site? That is, is the number of alternate alleles among all fo the genotype == 0? * * @return true if it's monomorphic */ public boolean isMonomorphicInSamples() { if ( monomorphic == null ) monomorphic = ! isVariant() || (hasGenotypes() && getCalledChrCount(getReference()) == getCalledChrCount()); return monomorphic; }
private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) { return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && checkVariationClass( evalVC, trainVC ) && (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphicInSamples()); }
public VariantContextTestData(final VCFHeader header, final List<VariantContext> vcs) { final Set<String> samples = new HashSet<String>(); for ( final VariantContext vc : vcs ) if ( vc.hasGenotypes() ) samples.addAll(vc.getSampleNames()); this.header = samples.isEmpty() ? header : new VCFHeader(header.getMetaDataInSortedOrder(), samples); this.vcs = vcs; }
private SiteStatus calcSiteStatus(VariantContext vc) { if ( vc == null ) return SiteStatus.NO_CALL; if ( vc.isFiltered() ) return SiteStatus.FILTERED; if ( vc.isMonomorphicInSamples() ) return SiteStatus.MONO; if ( vc.hasGenotypes() ) return SiteStatus.POLY; // must be polymorphic if isMonomorphicInSamples was false and there are genotypes if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) { int ac = 0; if ( vc.getNAlleles() > 2 ) { return SiteStatus.POLY; } else ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0); return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO; } else { return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do } }
@Test public void testCreatingComplexSubstitutionVariantContext() { List<Allele> alleles = Arrays.asList(Tref, ATC); VariantContext vc = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, alleles).make(); Assert.assertEquals(vc.getContig(), insLoc); Assert.assertEquals(vc.getStart(), insLocStart); Assert.assertEquals(vc.getEnd(), insLocStop); Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL); Assert.assertFalse(vc.isSNP()); Assert.assertTrue(vc.isIndel()); Assert.assertFalse(vc.isSimpleInsertion()); Assert.assertFalse(vc.isSimpleDeletion()); Assert.assertFalse(vc.isSimpleIndel()); Assert.assertFalse(vc.isMixed()); Assert.assertTrue(vc.isBiallelic()); Assert.assertEquals(vc.getNAlleles(), 2); Assert.assertEquals(vc.getReference(), Tref); Assert.assertEquals(vc.getAlleles().size(), 2); Assert.assertEquals(vc.getAlternateAlleles().size(), 1); Assert.assertEquals(vc.getAlternateAllele(0), ATC); Assert.assertFalse(vc.hasGenotypes()); Assert.assertEquals(vc.getSampleNames().size(), 0); }
@Test public void testCreatingRefVariantContext() { List<Allele> alleles = Arrays.asList(Aref); VariantContext vc = snpBuilder.alleles(alleles).make(); Assert.assertEquals(vc.getContig(), snpLoc); Assert.assertEquals(vc.getStart(), snpLocStart); Assert.assertEquals(vc.getEnd(), snpLocStop); Assert.assertEquals(VariantContext.Type.NO_VARIATION, vc.getType()); Assert.assertFalse(vc.isSNP()); Assert.assertFalse(vc.isIndel()); Assert.assertFalse(vc.isSimpleInsertion()); Assert.assertFalse(vc.isSimpleDeletion()); Assert.assertFalse(vc.isSimpleIndel()); Assert.assertFalse(vc.isMixed()); Assert.assertFalse(vc.isBiallelic()); Assert.assertEquals(vc.getNAlleles(), 1); Assert.assertEquals(vc.getReference(), Aref); Assert.assertEquals(vc.getAlleles().size(), 1); Assert.assertEquals(vc.getAlternateAlleles().size(), 0); //Assert.assertEquals(vc.getAlternateAllele(0), T); Assert.assertFalse(vc.hasGenotypes()); Assert.assertEquals(vc.getSampleNames().size(), 0); }
@Test public void testCreatingDeletionVariantContext() { List<Allele> alleles = Arrays.asList(ATCref, del); VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make(); Assert.assertEquals(vc.getContig(), delLoc); Assert.assertEquals(vc.getStart(), delLocStart); Assert.assertEquals(vc.getEnd(), delLocStop); Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL); Assert.assertFalse(vc.isSNP()); Assert.assertTrue(vc.isIndel()); Assert.assertFalse(vc.isSimpleInsertion()); Assert.assertTrue(vc.isSimpleDeletion()); Assert.assertTrue(vc.isSimpleIndel()); Assert.assertFalse(vc.isMixed()); Assert.assertTrue(vc.isBiallelic()); Assert.assertEquals(vc.getNAlleles(), 2); Assert.assertEquals(vc.getReference(), ATCref); Assert.assertEquals(vc.getAlleles().size(), 2); Assert.assertEquals(vc.getAlternateAlleles().size(), 1); Assert.assertEquals(vc.getAlternateAllele(0), del); Assert.assertFalse(vc.hasGenotypes()); Assert.assertEquals(vc.getSampleNames().size(), 0); }
@Test public void testCreatingInsertionVariantContext() { List<Allele> alleles = Arrays.asList(delRef, ATC); VariantContext vc = insBuilder.alleles(alleles).make(); Assert.assertEquals(vc.getContig(), insLoc); Assert.assertEquals(vc.getStart(), insLocStart); Assert.assertEquals(vc.getEnd(), insLocStop); Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL); Assert.assertFalse(vc.isSNP()); Assert.assertTrue(vc.isIndel()); Assert.assertTrue(vc.isSimpleInsertion()); Assert.assertFalse(vc.isSimpleDeletion()); Assert.assertTrue(vc.isSimpleIndel()); Assert.assertFalse(vc.isMixed()); Assert.assertTrue(vc.isBiallelic()); Assert.assertEquals(vc.getNAlleles(), 2); Assert.assertEquals(vc.getReference(), delRef); Assert.assertEquals(vc.getAlleles().size(), 2); Assert.assertEquals(vc.getAlternateAlleles().size(), 1); Assert.assertEquals(vc.getAlternateAllele(0), ATC); Assert.assertFalse(vc.hasGenotypes()); Assert.assertEquals(vc.getSampleNames().size(), 0); }
private void assertGoodVC(final VariantContext vc, final String contig, final int start, final int stop, final boolean nonRef) { Assert.assertEquals(vc.getChr(), contig); Assert.assertEquals(vc.getStart(), start); Assert.assertEquals(vc.getEnd(), stop); if ( nonRef ) { Assert.assertNotEquals(vc.getAlternateAllele(0), GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE); } else { Assert.assertEquals(vc.getNAlleles(), 2); Assert.assertEquals(vc.getAlternateAllele(0), GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE); Assert.assertEquals(vc.getAttributeAsInt(VCFConstants.END_KEY, -1), stop); Assert.assertTrue(vc.hasGenotypes()); Assert.assertTrue(vc.hasGenotype(SAMPLE_NAME)); Assert.assertEquals(vc.getGenotypes().size(), 1); final Genotype g = vc.getGenotype(SAMPLE_NAME); Assert.assertEquals(g.hasAD(), false); Assert.assertEquals(g.hasLikelihoods(), true); Assert.assertEquals(g.hasPL(), true); Assert.assertEquals(g.getPL().length == 3, true); Assert.assertEquals(g.hasDP(), true); Assert.assertEquals(g.hasGQ(), true); } }
@Test public void testAccessingCompleteGenotypes() { List<Allele> alleles = Arrays.asList(Aref, T, ATC); Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T)); Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T)); Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, ATC)); Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(ATC, ATC)); Genotype g6 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)); VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles) .genotypes(g1, g2, g3, g4, g5, g6).make(); Assert.assertTrue(vc.hasGenotypes()); Assert.assertFalse(vc.isMonomorphicInSamples()); Assert.assertTrue(vc.isPolymorphicInSamples()); Assert.assertEquals(vc.getGenotypes().size(), 6); Assert.assertEquals(3, vc.getGenotypes(Arrays.asList("AA", "Td", "dd")).size()); Assert.assertEquals(10, vc.getCalledChrCount()); Assert.assertEquals(3, vc.getCalledChrCount(Aref)); Assert.assertEquals(4, vc.getCalledChrCount(T)); Assert.assertEquals(3, vc.getCalledChrCount(ATC)); Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL)); Assert.assertEquals(T, vc.getAltAlleleWithHighestAlleleCount()); }
@Test public void testCreatingPartiallyCalledGenotype() { List<Allele> alleles = Arrays.asList(Aref, C); Genotype g = GenotypeBuilder.create("foo", Arrays.asList(C, Allele.NO_CALL)); VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g).make(); Assert.assertTrue(vc.isSNP()); Assert.assertEquals(vc.getNAlleles(), 2); Assert.assertTrue(vc.hasGenotypes()); Assert.assertFalse(vc.isMonomorphicInSamples()); Assert.assertTrue(vc.isPolymorphicInSamples()); Assert.assertEquals(vc.getGenotype("foo"), g); Assert.assertEquals(vc.getCalledChrCount(), 1); // we only have 1 called chromosomes, we exclude the NO_CALL one isn't called Assert.assertEquals(vc.getCalledChrCount(Aref), 0); Assert.assertEquals(vc.getCalledChrCount(C), 1); Assert.assertFalse(vc.getGenotype("foo").isHet()); Assert.assertFalse(vc.getGenotype("foo").isHom()); Assert.assertFalse(vc.getGenotype("foo").isNoCall()); Assert.assertFalse(vc.getGenotype("foo").isHom()); Assert.assertTrue(vc.getGenotype("foo").isMixed()); Assert.assertEquals(vc.getGenotype("foo").getType(), GenotypeType.MIXED); }
public static void assertVariantContextsAreEqual( final VariantContext actual, final VariantContext expected ) { Assert.assertNotNull(actual, "VariantContext expected not null"); Assert.assertEquals(actual.getChr(), expected.getChr(), "chr"); Assert.assertEquals(actual.getStart(), expected.getStart(), "start"); Assert.assertEquals(actual.getEnd(), expected.getEnd(), "end"); Assert.assertEquals(actual.getID(), expected.getID(), "id"); Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles for " + expected + " vs " + actual); assertAttributesEquals(actual.getAttributes(), expected.getAttributes()); Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied(), "filtersWereApplied"); Assert.assertEquals(actual.isFiltered(), expected.isFiltered(), "isFiltered"); assertEqualsSet(actual.getFilters(), expected.getFilters(), "filters"); assertEqualsDoubleSmart(actual.getPhredScaledQual(), expected.getPhredScaledQual()); Assert.assertEquals(actual.hasGenotypes(), expected.hasGenotypes(), "hasGenotypes"); if ( expected.hasGenotypes() ) { assertEqualsSet(actual.getSampleNames(), expected.getSampleNames(), "sample names set"); Assert.assertEquals(actual.getSampleNamesOrderedByName(), expected.getSampleNamesOrderedByName(), "sample names"); final Set<String> samples = expected.getSampleNames(); for ( final String sample : samples ) { assertGenotypesAreEqual(actual.getGenotype(sample), expected.getGenotype(sample)); } } }
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) ) return 0; Collection<VariantContext> contexts = getVariantContexts(tracker, ref); for ( VariantContext vc : contexts ) { VariantContextBuilder builder = new VariantContextBuilder(vc); // set the appropriate sample name if necessary if ( sampleName != null && vc.hasGenotypes() && vc.hasGenotype(variants.getName()) ) { Genotype g = new GenotypeBuilder(vc.getGenotype(variants.getName())).name(sampleName).make(); builder.genotypes(g); } final VariantContext withID = variantOverlapAnnotator.annotateRsID(tracker, builder.make()); writeRecord(withID, tracker, ref.getLocus()); } return 1; }