/** * Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS) * * @return chromosome count */ public int getCalledChrCount() { final Set<String> noSamples = Collections.emptySet(); return getCalledChrCount(noSamples); }
/** * Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS) * * @return chromosome count */ public int getCalledChrCount() { final Set<String> noSamples = Collections.emptySet(); return getCalledChrCount(noSamples); }
/** * Returns the number of chromosomes carrying allele A in the genotypes * * @param a allele * @return chromosome count */ public int getCalledChrCount(Allele a) { return getCalledChrCount(a, new HashSet<>(0)); }
/** * Returns the number of chromosomes carrying allele A in the genotypes * * @param a allele * @return chromosome count */ public int getCalledChrCount(Allele a) { return getCalledChrCount(a,new HashSet<String>(0)); }
/** * Returns the number of chromosomes carrying allele A in the genotypes * * @param a allele * @return chromosome count */ public int getCalledChrCount(Allele a) { return getCalledChrCount(a, new HashSet<>(0)); }
/** * Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS) * * @return chromosome count */ public int getCalledChrCount() { final Set<String> noSamples = Collections.emptySet(); return getCalledChrCount(noSamples); }
/** * Genotype-specific functions -- are the genotypes monomorphic w.r.t. to the alleles segregating at this * site? That is, is the number of alternate alleles among all fo the genotype == 0? * * @return true if it's monomorphic */ public boolean isMonomorphicInSamples() { if ( monomorphic == null ) monomorphic = ! isVariant() || (hasGenotypes() && getCalledChrCount(getReference()) == getCalledChrCount()); return monomorphic; }
/** * Genotype-specific functions -- are the genotypes monomorphic w.r.t. to the alleles segregating at this * site? That is, is the number of alternate alleles among all fo the genotype == 0? * * @return true if it's monomorphic */ public boolean isMonomorphicInSamples() { if ( monomorphic == null ) monomorphic = ! isVariant() || (hasGenotypes() && getCalledChrCount(getReference()) == getCalledChrCount()); return monomorphic; }
/** * Genotype-specific functions -- are the genotypes monomorphic w.r.t. to the alleles segregating at this * site? That is, is the number of alternate alleles among all fo the genotype == 0? * * @return true if it's monomorphic */ public boolean isMonomorphicInSamples() { if ( monomorphic == null ) monomorphic = ! isVariant() || (hasGenotypes() && getCalledChrCount(getReference()) == getCalledChrCount()); return monomorphic; }
public Allele getAltAlleleWithHighestAlleleCount() { // optimization: for bi-allelic sites, just return the 1only alt allele if ( isBiallelic() ) return getAlternateAllele(0); Allele best = null; int maxAC1 = 0; for ( Allele a : getAlternateAlleles() ) { final int ac = getCalledChrCount(a); if ( ac >= maxAC1 ) { maxAC1 = ac; best = a; } } return best; }
public static double computeHardyWeinbergPvalue(VariantContext vc) { if ( vc.getCalledChrCount() == 0 ) return 0.0; return HardyWeinbergCalculation.hwCalculate(vc.getHomRefCount(), vc.getHetCount(), vc.getHomVarCount()); }
private Pair<Integer, Integer> estNumberOfEvaluations(final AFCalculatorTestBuilder testBuilder, final VariantContext vc, final int nonTypePL) { final int evalOverhead = 2; // 2 final int maxEvalsPerSamplePerAC = 3; int minEvals = 0, maxEvals = 0; for ( final Allele alt : vc.getAlternateAlleles() ) { final int AC = vc.getCalledChrCount(alt); minEvals += AC + evalOverhead; // everyone is hom-var maxEvals += AC * maxEvalsPerSamplePerAC + 10; } return new Pair<Integer, Integer>(minEvals, maxEvals); }
AC = Math.max(AC, eval.getCalledChrCount(allele));
@Test public void testMonomorphicVariant() { Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("BB", Arrays.asList(Aref, Allele.NO_CALL)); Genotype g3 = GenotypeBuilder.create("CC", Arrays.asList(Allele.NO_CALL,Allele.NO_CALL)); GenotypesContext gc = GenotypesContext.create(g1, g2, g3); VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Collections.singletonList(Aref)).genotypes(gc).make(); Assert.assertEquals(vc.getType(), VariantContext.Type.NO_VARIATION); Assert.assertNull(vc.getAltAlleleWithHighestAlleleCount()); Assert.assertEquals(vc.getCalledChrCount(Aref), 3); }
@Test public void testAccessingCompleteGenotypes() { List<Allele> alleles = Arrays.asList(Aref, T, ATC); Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T)); Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T)); Genotype g4 = GenotypeBuilder.create("Td", Arrays.asList(T, ATC)); Genotype g5 = GenotypeBuilder.create("dd", Arrays.asList(ATC, ATC)); Genotype g6 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)); VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles) .genotypes(g1, g2, g3, g4, g5, g6).make(); Assert.assertTrue(vc.hasGenotypes()); Assert.assertFalse(vc.isMonomorphicInSamples()); Assert.assertTrue(vc.isPolymorphicInSamples()); Assert.assertEquals(vc.getGenotypes().size(), 6); Assert.assertEquals(3, vc.getGenotypes(Arrays.asList("AA", "Td", "dd")).size()); Assert.assertEquals(10, vc.getCalledChrCount()); Assert.assertEquals(3, vc.getCalledChrCount(Aref)); Assert.assertEquals(4, vc.getCalledChrCount(T)); Assert.assertEquals(3, vc.getCalledChrCount(ATC)); Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL)); Assert.assertEquals(T, vc.getAltAlleleWithHighestAlleleCount()); }
@Test public void testAccessingRefGenotypes() { List<Allele> alleles1 = Arrays.asList(Aref, T); List<Allele> alleles2 = Arrays.asList(Aref); List<Allele> alleles3 = Arrays.asList(Aref, T); for ( List<Allele> alleles : Arrays.asList(alleles1, alleles2, alleles3)) { Genotype g1 = GenotypeBuilder.create("AA1", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("AA2", Arrays.asList(Aref, Aref)); Genotype g3 = GenotypeBuilder.create("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)); VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles) .genotypes(g1, g2, g3).make(); Assert.assertTrue(vc.hasGenotypes()); Assert.assertTrue(vc.isMonomorphicInSamples()); Assert.assertFalse(vc.isPolymorphicInSamples()); Assert.assertEquals(vc.getGenotypes().size(), 3); Assert.assertEquals(4, vc.getCalledChrCount()); Assert.assertEquals(4, vc.getCalledChrCount(Aref)); Assert.assertEquals(0, vc.getCalledChrCount(T)); Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL)); //bi allelic, only one alt allele Allele expected; if (alleles.size()>1) { expected = alleles.get(1); } else { expected = null; } Assert.assertEquals( vc.getAltAlleleWithHighestAlleleCount(), expected); } }
@Test public void testCreatingPartiallyCalledGenotype() { List<Allele> alleles = Arrays.asList(Aref, C); Genotype g = GenotypeBuilder.create("foo", Arrays.asList(C, Allele.NO_CALL)); VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles).genotypes(g).make(); Assert.assertTrue(vc.isSNP()); Assert.assertEquals(vc.getNAlleles(), 2); Assert.assertTrue(vc.hasGenotypes()); Assert.assertFalse(vc.isMonomorphicInSamples()); Assert.assertTrue(vc.isPolymorphicInSamples()); Assert.assertEquals(vc.getGenotype("foo"), g); Assert.assertEquals(vc.getCalledChrCount(), 1); // we only have 1 called chromosomes, we exclude the NO_CALL one isn't called Assert.assertEquals(vc.getCalledChrCount(Aref), 0); Assert.assertEquals(vc.getCalledChrCount(C), 1); Assert.assertFalse(vc.getGenotype("foo").isHet()); Assert.assertFalse(vc.getGenotype("foo").isHom()); Assert.assertFalse(vc.getGenotype("foo").isNoCall()); Assert.assertFalse(vc.getGenotype("foo").isHom()); Assert.assertTrue(vc.getGenotype("foo").isMixed()); Assert.assertEquals(vc.getGenotype("foo").getType(), GenotypeType.MIXED); }
@Test public void testAccessingSimpleSNPGenotypes() { List<Allele> alleles = Arrays.asList(Aref, T); Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref)); Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T)); Genotype g3 = GenotypeBuilder.create("TT", Arrays.asList(T, T)); VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, alleles) .genotypes(g1, g2, g3).make(); Assert.assertTrue(vc.hasGenotypes()); Assert.assertFalse(vc.isMonomorphicInSamples()); Assert.assertTrue(vc.isPolymorphicInSamples()); Assert.assertEquals(vc.getSampleNames().size(), 3); Assert.assertEquals(vc.getGenotypes().size(), 3); Assert.assertEquals(vc.getGenotypes().get("AA"), g1); Assert.assertEquals(vc.getGenotype("AA"), g1); Assert.assertEquals(vc.getGenotypes().get("AT"), g2); Assert.assertEquals(vc.getGenotype("AT"), g2); Assert.assertEquals(vc.getGenotypes().get("TT"), g3); Assert.assertEquals(vc.getGenotype("TT"), g3); Assert.assertTrue(vc.hasGenotype("AA")); Assert.assertTrue(vc.hasGenotype("AT")); Assert.assertTrue(vc.hasGenotype("TT")); Assert.assertFalse(vc.hasGenotype("foo")); Assert.assertFalse(vc.hasGenotype("TTT")); Assert.assertFalse(vc.hasGenotype("at")); Assert.assertFalse(vc.hasGenotype("tt")); Assert.assertEquals(vc.getCalledChrCount(), 6); Assert.assertEquals(vc.getCalledChrCount(Aref), 3); Assert.assertEquals(vc.getCalledChrCount(T), 3); }
private void testCalling(final AFCalculatorTestBuilder testBuilder, final List<Integer> ACs, final int nonTypePL, final List<Boolean> expectedPoly) { final AFCalculator calc = testBuilder.makeModel(); final double[] priors = testBuilder.makePriors(); final VariantContext vc = testBuilder.makeACTest(ACs, 0, nonTypePL); final AFCalculationResult result = calc.getLog10PNonRef(vc, PLOIDY, testBuilder.numAltAlleles, priors); boolean anyPoly = false; for ( final boolean onePoly : expectedPoly ) anyPoly = anyPoly || onePoly; if ( anyPoly ) Assert.assertTrue(result.getLog10PosteriorOfAFGT0() > -1); for ( int altI = 1; altI < result.getAllelesUsedInGenotyping().size(); altI++ ) { final int i = altI - 1; final Allele alt = result.getAllelesUsedInGenotyping().get(altI); // must be getCalledChrCount because we cannot ensure that the VC made has our desired ACs Assert.assertEquals(result.getAlleleCountAtMLE(alt), vc.getCalledChrCount(alt)); Assert.assertEquals(result.isPolymorphic(alt, -1), (boolean)expectedPoly.get(i), "isPolymorphic for allele " + alt + " " + result.getLog10PosteriorOfAFEq0ForAllele(alt)); } } }
protected double calculateIC(final VariantContext vc, final Allele altAllele) { final int AN = vc.getCalledChrCount(); final double altAF; final double hetCount = heterozygosityUtils.getHetCount(vc, altAllele); final double F; //shortcut to get a value closer to the non-alleleSpecific value for bialleleics if (vc.isBiallelic()) { double refAC = heterozygosityUtils.getAlleleCount(vc, vc.getReference()); double altAC = heterozygosityUtils.getAlleleCount(vc, altAllele); double refAF = refAC/(altAC+refAC); altAF = 1 - refAF; F = 1.0 - (hetCount / (2.0 * refAF * altAF * (double) heterozygosityUtils.getSampleCount())); // inbreeding coefficient } else { //compare number of hets for this allele (and any other second allele) with the expectation based on AFs //derive the altAF from the likelihoods to account for any accumulation of fractional counts from non-primary likelihoods, //e.g. for a GQ10 variant, the probability of the call will be ~0.9 and the second best call will be ~0.1 so adding up those 0.1s for het counts can dramatically change the AF compared with integer counts altAF = heterozygosityUtils.getAlleleCount(vc, altAllele)/ (double) AN; F = 1.0 - (hetCount / (2.0 * (1 - altAF) * altAF * (double) heterozygosityUtils.getSampleCount())); // inbreeding coefficient } return F; } }