/** * Decode the sites level data from this classes decoder * * @param builder * @return */ private final void decodeSiteLoc(final VariantContextBuilder builder) throws IOException { final int contigOffset = decoder.decodeInt(BCF2Type.INT32); final String contig = lookupContigName(contigOffset); builder.chr(contig); this.pos = decoder.decodeInt(BCF2Type.INT32) + 1; // GATK is one based, BCF2 is zero-based final int refLength = decoder.decodeInt(BCF2Type.INT32); builder.start((long)pos); builder.stop((long)(pos + refLength - 1)); // minus one because GATK has closed intervals but BCF2 is open }
/** * Decode the sites level data from this classes decoder * * @param builder * @return */ private final void decodeSiteLoc(final VariantContextBuilder builder) throws IOException { final int contigOffset = decoder.decodeInt(BCF2Type.INT32); final String contig = lookupContigName(contigOffset); builder.chr(contig); this.pos = decoder.decodeInt(BCF2Type.INT32) + 1; // GATK is one based, BCF2 is zero-based final int refLength = decoder.decodeInt(BCF2Type.INT32); builder.start((long)pos); builder.stop((long)(pos + refLength - 1)); // minus one because GATK has closed intervals but BCF2 is open }
private void writeVariant(VariantContextWriter writer, GenomeVariant genomeVar) { String nameInFasta = mapContigToFasta(genomeVar.getChrName()); List<Allele> alleles = new ArrayList<Allele>(); int shift = 0; if (genomeVar.getRef().isEmpty() || genomeVar.getAlt().isEmpty()) { shift = -1; String left = fasta.getSubsequenceAt(nameInFasta, genomeVar.getPos(), genomeVar.getPos()) .getBaseString(); alleles.add(Allele.create(left + genomeVar.getRef(), true)); alleles.add(Allele.create(left + genomeVar.getAlt(), false)); } else { alleles.add(Allele.create(genomeVar.getRef(), true)); alleles.add(Allele.create(genomeVar.getAlt(), false)); } VariantContextBuilder builder = new VariantContextBuilder(); builder.chr(genomeVar.getChrName()).start(genomeVar.getPos() + shift + 1) .computeEndFromAlleles(alleles, genomeVar.getPos() + shift + 1).alleles(alleles); writer.add(builder.make()); }
@Test(dataProvider = "ContiguousData") public void testIsContiguous(final String contig, final int pos, final boolean expected) { final HomRefBlock band = new HomRefBlock(vc, 10, 20, HomoSapiensConstants.DEFAULT_PLOIDY); final VariantContext testVC = new VariantContextBuilder(vc).chr(contig).start(pos).stop(pos).make(); Assert.assertEquals(band.isContiguous(testVC), expected); }
/** * Decode the sites level data from this classes decoder * * @param builder * @return */ @Requires({"builder != null"}) private final void decodeSiteLoc(final VariantContextBuilder builder) throws IOException { final int contigOffset = decoder.decodeInt(BCF2Type.INT32); final String contig = lookupContigName(contigOffset); builder.chr(contig); this.pos = decoder.decodeInt(BCF2Type.INT32) + 1; // GATK is one based, BCF2 is zero-based final int refLength = decoder.decodeInt(BCF2Type.INT32); builder.start((long)pos); builder.stop((long)(pos + refLength - 1)); // minus one because GATK has closed intervals but BCF2 is open }
protected static VariantContextBuilder liftSimpleVariantContext(VariantContext source, Interval target) { if (target == null || source.getReference().length() != target.length()) { return null; } // Build the new variant context. Note: this will copy genotypes, annotations and filters final VariantContextBuilder builder = new VariantContextBuilder(source); builder.chr(target.getContig()); builder.start(target.getStart()); builder.stop(target.getEnd()); return builder; }
protected static VariantContextBuilder liftSimpleVariantContext(VariantContext source, Interval target) { if (target == null || source.getReference().length() != target.length()) { return null; } // Build the new variant context. Note: this will copy genotypes, annotations and filters final VariantContextBuilder builder = new VariantContextBuilder(source); builder.chr(target.getContig()); builder.start(target.getStart()); builder.stop(target.getEnd()); return builder; }
@Test public void TestMergeIntoMNPvalidationDiffContigs() { final String contig = new String("2"); final VariantContext vc = new VariantContextBuilder().chr(contig).id("id1").source("TC").start(start+1).stop(start+1).alleles(alleleList2).genotypes(genotype2).make(); Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, vc1, vc)); }
@DataProvider(name = "CreateHaplotypeMappingProvider") public Object[][] makeCreateHaplotypeMappingData() { List<Object[]> tests = new ArrayList<Object[]>(); final Set<Haplotype> haplotypes = new HashSet<>(); final Allele ref = Allele.create("A", true); final Allele altC = Allele.create("C", false); final Allele altT = Allele.create("T", false); final Haplotype AtoC1 = new Haplotype("AACAA".getBytes()); final VariantContext vc1 = new VariantContextBuilder().chr("20").start(3).stop(3).alleles(Arrays.asList(ref, altC)).make(); AtoC1.setEventMap(new EventMap(Arrays.asList(vc1))); AtoC1.getEventMap().put(3, vc1); haplotypes.add(AtoC1); final Haplotype AtoC2 = new Haplotype("AAACA".getBytes()); final VariantContext vc2 = new VariantContextBuilder().chr("20").start(4).stop(4).alleles(Arrays.asList(ref, altT)).make(); AtoC2.setEventMap(new EventMap(Arrays.asList(vc2))); AtoC2.getEventMap().put(4, vc2); haplotypes.add(AtoC2); tests.add(new Object[]{vc1, haplotypes, AtoC1}); tests.add(new Object[]{vc2, haplotypes, AtoC2}); tests.add(new Object[]{new VariantContextBuilder().chr("20").start(1).stop(1).alleles(Arrays.asList(ref, altT)).make(), haplotypes, null}); return tests.toArray(new Object[][]{}); }
@Test public void TestAllGenotypesAreUnfilteredAndCalled(){ final VariantContext vc = new VariantContextBuilder().chr(contig).id("id1").source("TC").start(start).stop(start).alleles(alleleList1).genotypes(genotype1).make(); Assert.assertTrue(PhasingUtils.allGenotypesAreUnfilteredAndCalled(vc)); }
@Test public void TestMergeIntoMNPvalidationCheckLocBefore() { final VariantContext vc1before = new VariantContextBuilder().chr(contig).id("id1").source("TC").start(start+1).stop(start+1).alleles(alleleList1).make(); final VariantContext vc2after = new VariantContextBuilder().chr(contig).id("id2").source("GA").start(start).stop(start).alleles(alleleList2).make(); Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, vc1before, vc2after)); }
@Test public void testBasicGenotypeSummaries() { final String sample1 = "NA1"; final String sample2 = "NA2"; final Allele refAllele = Allele.create("A",true); final Allele altAllele = Allele.create("T"); final Allele noCallAllele = Allele.NO_CALL; final double[] genotypeLikelihoods1 = {30,0,190}; final GenotypesContext testGC = GenotypesContext.create(2); // sample1 -> A/T with GQ 30 testGC.add(new GenotypeBuilder(sample1).alleles(Arrays.asList(refAllele,altAllele)).PL(genotypeLikelihoods1).GQ(30).make()); // sample2 -> ./. with missing GQ testGC.add(new GenotypeBuilder(sample2).alleles(Arrays.asList(noCallAllele, noCallAllele)).make()); final VariantContext testVC = (new VariantContextBuilder()) .alleles(Arrays.asList(refAllele, altAllele)).chr("1").start(15L).stop(15L).genotypes(testGC).make(); final GenotypeSummaries GS = new GenotypeSummaries(); final Map<String,Object> resultMap = GS.annotate(null, null, null, null, testVC, null); Assert.assertEquals(1, resultMap.get(GATKVCFConstants.NOCALL_CHROM_KEY)); // 1 no-called called sample Assert.assertEquals(30.0, Double.parseDouble((String)resultMap.get(GATKVCFConstants.GQ_MEAN_KEY)), 1E-4); // mean GQ is 30 Assert.assertFalse(resultMap.containsKey(GATKVCFConstants.GQ_STDEV_KEY)); // no stddev with only one data point } }
@Test public void TestMergeIntoMNPvalidationFilterNoCall() { final List<String> filters = Arrays.asList("filter"); final List<Allele> alleleList = Arrays.asList(Allele.create("T", true), Allele.create(".", false)); final Genotype genotype = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList).filters(filters).make(); final VariantContext vc = new VariantContextBuilder().chr(contig).id("id1").source("TC").start(start).stop(start).alleles(alleleList1).genotypes(genotype).make(); Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, vc, vc2)); }
@Test public void TestMergeIntoMNPvalidationFiltered() { final List<String> filters = Arrays.asList("filter"); final Genotype genotype = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList1).filters(filters).make(); final VariantContext vc = new VariantContextBuilder().chr(contig).id("id1").source("TC").start(start).stop(start).alleles(alleleList1).genotypes(genotype).make(); Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, vc, vc2)); }
@Test public void TestSomeSampleHasDoubleNonReferenceAlleleTrue(){ Genotype genotype = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList2).make(); VariantContext vc = new VariantContextBuilder().chr(contig).id("id2").source("GA").start(start+1).stop(start+1).alleles(alleleList2).genotypes(genotype).make(); Assert.assertTrue(PhasingUtils.someSampleHasDoubleNonReferenceAllele(vc1, vc)); }
@Test public void TestMergeIntoMNPvalidationDiffSampleNames() { final Genotype genotype = new GenotypeBuilder().name("sample1").attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList1).make(); final VariantContext vc = new VariantContextBuilder().chr(contig).id("id1").source("TC").start(start).stop(start).alleles(alleleList1).genotypes(genotype).make(); Assert.assertFalse(PhasingUtils.mergeIntoMNPvalidationCheck(genomeLocParser, vc, vc2)); }
@Test public void TestDoubleAllelesSegregatePerfectlyAmongSamples(){ final Genotype genotype = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-1", "10-2"}).alleles(alleleList2).make(); final VariantContext vc = new VariantContextBuilder().chr(contig).id("id2").source("GA").start(start+1).stop(start+1).alleles(alleleList2).genotypes(genotype).make(); Assert.assertTrue(PhasingUtils.doubleAllelesSegregatePerfectlyAmongSamples(vc1, vc)); }
protected static VariantContextBuilder reverseComplementVariantContext(final VariantContext source, final Interval target, final ReferenceSequence refSeq) { if (target.isPositiveStrand()) { throw new IllegalArgumentException("This should only be called for negative strand liftovers"); } final List<Allele> origAlleles = new ArrayList<>(source.getAlleles()); final VariantContextBuilder vcb = new VariantContextBuilder(source); vcb.chr(target.getContig()); // By convention, indels are left aligned and include the base prior to that indel. // When reverse complementing, will be necessary to include this additional base. // This check prevents the extremely rare situation in which the indel occurs on the // first base of the sequence final boolean addToStart = source.isIndel() && target.getStart() > 1; final int start = target.getStart() - (addToStart ? 1 : 0); vcb.start(start); final int stop = target.getEnd() - (addToStart ? 1 : 0); vcb.stop(stop); vcb.alleles(reverseComplementAlleles(origAlleles, target, refSeq, source.isIndel(), addToStart)); if (!source.isSNP()) { // check that the reverse complemented bases match the new reference if (!referenceAlleleMatchesReferenceForIndel(vcb.getAlleles(), refSeq, start, stop)) { return null; } leftAlignVariant(vcb, start, stop, vcb.getAlleles(), refSeq); } vcb.genotypes(fixGenotypes(source.getGenotypes(), origAlleles, vcb.getAlleles())); return vcb; }
@BeforeSuite public void init() throws FileNotFoundException { referenceFile = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); genomeLocParser = new GenomeLocParser(referenceFile); alleleList1 = Arrays.asList(Allele.create("T", true), Allele.create("C", false)); alleleList2 = Arrays.asList(Allele.create("G", true), Allele.create("A", false)); genotype1 = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-1", "10-2"}).attribute("PQ", 100.0).alleles(alleleList1).make(); genotype2 = new GenotypeBuilder().name("sample").attribute("HP", new String[]{"10-2", "10-1"}).attribute("PQ", 200.0).alleles(alleleList2).make(); contig = new String("1"); vc1 = new VariantContextBuilder().chr(contig).id("id1").source("TC").start(start).stop(start).alleles(alleleList1).genotypes(genotype1).make(); vc2 = new VariantContextBuilder().chr(contig).id("id2").source("GA").start(start+1).stop(start+1).alleles(alleleList2).genotypes(genotype2).make(); }