/** * Returns a map with the original read as a key and the realigned read as the value. * <p> * Missing keys or equivalent key and value pairs mean that the read was not realigned. * </p> * @return never {@code null} */ private Map<GATKSAMRecord,GATKSAMRecord> realignReadsToTheirBestHaplotype(final ReadLikelihoods<Haplotype> originalReadLikelihoods, final Haplotype refHaplotype, final GenomeLoc paddedReferenceLoc) { final Collection<ReadLikelihoods<Haplotype>.BestAllele> bestAlleles = originalReadLikelihoods.bestAlleles(); final Map<GATKSAMRecord,GATKSAMRecord> result = new HashMap<>(bestAlleles.size()); for (final ReadLikelihoods<Haplotype>.BestAllele bestAllele : bestAlleles) { final GATKSAMRecord originalRead = bestAllele.read; final Haplotype bestHaplotype = bestAllele.allele; final boolean isInformative = bestAllele.isInformative(); final GATKSAMRecord realignedRead = AlignmentUtils.createReadAlignedToRef(originalRead, bestHaplotype, refHaplotype, paddedReferenceLoc.getStart(), isInformative); result.put(originalRead,realignedRead); } return result; }
/** * Returns a map with the original read as a key and the realigned read as the value. * <p> * Missing keys or equivalent key and value pairs mean that the read was not realigned. * </p> * @return never {@code null} */ // TODO: migrate from HC -> HCUtils Class and share it! private Map<GATKSAMRecord,GATKSAMRecord> realignReadsToTheirBestHaplotype(final ReadLikelihoods<Haplotype> originalReadLikelihoods, final Haplotype refHaplotype, final GenomeLoc paddedReferenceLoc) { final Collection<ReadLikelihoods<Haplotype>.BestAllele> bestAlleles = originalReadLikelihoods.bestAlleles(); final Map<GATKSAMRecord,GATKSAMRecord> result = new HashMap<>(bestAlleles.size()); for (final ReadLikelihoods<Haplotype>.BestAllele bestAllele : bestAlleles) { final GATKSAMRecord originalRead = bestAllele.read; final Haplotype bestHaplotype = bestAllele.allele; final boolean isInformative = bestAllele.isInformative(); final GATKSAMRecord realignedRead = AlignmentUtils.createReadAlignedToRef(originalRead, bestHaplotype, refHaplotype, paddedReferenceLoc.getStart(), isInformative); result.put(originalRead,realignedRead); } return result; }
/** * Loop over all of the reads in this likelihood map and realign them to its most likely haplotype * @param haplotypes the collection of haplotypes * @param paddedReferenceLoc the active region */ public void realignReadsToMostLikelyHaplotype(final Collection<Haplotype> haplotypes, final GenomeLoc paddedReferenceLoc) { // we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<>(haplotypes.size()); Haplotype refHaplotype = null; for ( final Haplotype haplotype : haplotypes ) { alleleToHaplotypeMap.put(Allele.create(haplotype.getBases()), haplotype); if (refHaplotype == null && haplotype.isReference()) refHaplotype = haplotype; } final Map<GATKSAMRecord, Map<Allele, Double>> newLikelihoodReadMap = new LinkedHashMap<>(likelihoodReadMap.size()); for( final Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : likelihoodReadMap.entrySet() ) { final MostLikelyAllele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue()); final GATKSAMRecord alignedToRef = AlignmentUtils.createReadAlignedToRef(entry.getKey(), alleleToHaplotypeMap.get(bestAllele.getMostLikelyAllele()), refHaplotype, paddedReferenceLoc.getStart(), bestAllele.isInformative()); newLikelihoodReadMap.put(alignedToRef, entry.getValue()); } likelihoodReadMap.clear(); likelihoodReadMap.putAll(newLikelihoodReadMap); } }
@Test(dataProvider = "ReadAlignedToRefData", enabled = true) public void testReadAlignedToRef(final GATKSAMRecord read, final Haplotype haplotype, final int refStart, final int expectedReadStart, final String expectedReadCigar) throws Exception { final HaplotypeBAMWriter writer = new CalledHaplotypeBAMWriter(new MockDestination()); final GATKSAMRecord originalReadCopy = (GATKSAMRecord)read.clone(); if ( expectedReadCigar == null ) { Assert.assertNull(AlignmentUtils.createReadAlignedToRef(read, haplotype, haplotype, refStart, true)); } else { final Cigar expectedCigar = TextCigarCodec.decode(expectedReadCigar); final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, haplotype, refStart, true); Assert.assertEquals(alignedRead.getReadName(), originalReadCopy.getReadName()); Assert.assertEquals(alignedRead.getAlignmentStart(), expectedReadStart); Assert.assertEquals(alignedRead.getReadBases(), originalReadCopy.getReadBases()); Assert.assertEquals(alignedRead.getBaseQualities(), originalReadCopy.getBaseQualities()); Assert.assertEquals(alignedRead.getAlignmentStart(), expectedReadStart); Assert.assertEquals(alignedRead.getCigar(), expectedCigar); Assert.assertNotNull(alignedRead.getAttribute("HC")); } Assert.assertEquals(read, originalReadCopy, "createReadAlignedToRef seems be modifying the original read!"); }
@Test(dataProvider = "ComplexReadAlignedToRef", enabled = !DEBUG) public void testReadAlignedToRefComplexAlignment(final int testIndex, final GATKSAMRecord read, final String reference, final Haplotype haplotype, final int expectedMaxMismatches) throws Exception { final HaplotypeBAMWriter writer = new CalledHaplotypeBAMWriter(new MockDestination()); final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, new Haplotype(reference.getBytes(),true), 1, true); if ( alignedRead != null ) { final int mismatches = AlignmentUtils.getMismatchCount(alignedRead, reference.getBytes(), alignedRead.getAlignmentStart() - 1).numMismatches; Assert.assertTrue(mismatches <= expectedMaxMismatches, "Alignment of read to ref looks broken. Expected at most " + expectedMaxMismatches + " but saw " + mismatches + " for readBases " + new String(read.getReadBases()) + " with cigar " + read.getCigar() + " reference " + reference + " haplotype " + haplotype + " with cigar " + haplotype.getCigar() + " aligned read cigar " + alignedRead.getCigarString() + " @ " + alignedRead.getAlignmentStart()); } } }