/** * Set the cigar of this haplotype to cigar. * * Note that this function consolidates the cigar, so that 1M1M1I1M1M => 2M1I2M * * @param cigar a cigar whose readLength == length() */ public void setCigar( final Cigar cigar ) { this.cigar = AlignmentUtils.consolidateCigar(cigar); if ( this.cigar.getReadLength() != length() ) throw new IllegalArgumentException("Read length " + length() + " not equal to the read length of the cigar " + cigar.getReadLength() + " " + this.cigar); }
@Override protected Double getElementForRead(final GATKSAMRecord read, final int refLoc) { final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true ); if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) return null; // If the offset inside a deletion, it does not lie on a read. if ( AlignmentUtils.isInsideDeletion(read.getCigar(), offset) ) { return INVALID_ELEMENT_FROM_READ; } int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, 0, 0 ); final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read ); if (readPos > numAlignedBases / 2) readPos = numAlignedBases - (readPos + 1); return (double)readPos; }
/** * Counts of observed bases at a genomic position e.g. {13,0,0,1} at chr1:100,000,000 * * @param perReadAlleleLikelihoodMap for each read, the underlying alleles represented by an aligned read, and corresponding relative likelihood. * @param vc variant context * @return count of A, C, G, T bases */ private int[] getBaseCounts(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap, final VariantContext vc) { final Set<Allele> alleles = new HashSet<>(vc.getAlleles()); return AlignmentUtils.countBasesAtPileupPosition(perReadAlleleLikelihoodMap, alleles, vc.getStart()); } }
pos = addCigarElements(newElements, pos, start, end, elt); break; case S: case I: newElements.add(elt); } else { pos = addCigarElements(newElements, pos, start, end, elt); return AlignmentUtils.consolidateCigar(new Cigar(newElements));
final Cigar swCigar = consolidateCigar(swPairwiseAlignment.getCigar()); final int readStartOnHaplotype = calcFirstBaseMatchingReferenceInCigar(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1()); final int readStartOnReference = referenceStart + haplotype.getAlignmentStartHapwrtRef() + readStartOnHaplotype; read.setAlignmentStart(readStartOnReference); final Cigar haplotypeToRef = trimCigarByBases(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1(), extendedHaplotypeCigar.getReadLength() - 1); final Cigar readToRefCigarRaw = applyCigarToCigar(swCigar, haplotypeToRef); final Cigar readToRefCigarClean = cleanUpCigar(readToRefCigarRaw); final Cigar readToRefCigar = leftAlignIndel(readToRefCigarClean, refHaplotype.getBases(), originalRead.getReadBases(), swPairwiseAlignment.getAlignmentStart2wrt1(), 0, true);
int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read); if ( numBlocks == 2 ) { Cigar newCigar = AlignmentUtils.leftAlignIndel(unclipCigar(read.getCigar()), reference, read.getReadBases(), read.getAlignmentStart()-leftmostIndex, 0, true); aRead.setCigar(newCigar, false); totalRawMismatchSum += rawMismatchScore; aRead.setMismatchScoreToReference(rawMismatchScore); aRead.setAlignerMismatchScore(AlignmentUtils.mismatchingQualities(aRead.getRead(), reference, startOnRef));
final CigarElement elt23 = secondToThird.getCigarElement(cigar23I); final CigarPairTransform transform = getTransformer(elt12.getOperator(), elt23.getOperator()); return AlignmentUtils.consolidateCigar(new Cigar(newElements));
@Test(dataProvider = "ComplexReadAlignedToRef", enabled = !DEBUG) public void testReadAlignedToRefComplexAlignment(final int testIndex, final GATKSAMRecord read, final String reference, final Haplotype haplotype, final int expectedMaxMismatches) throws Exception { final HaplotypeBAMWriter writer = new CalledHaplotypeBAMWriter(new MockDestination()); final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, new Haplotype(reference.getBytes(),true), 1, true); if ( alignedRead != null ) { final int mismatches = AlignmentUtils.getMismatchCount(alignedRead, reference.getBytes(), alignedRead.getAlignmentStart() - 1).numMismatches; Assert.assertTrue(mismatches <= expectedMaxMismatches, "Alignment of read to ref looks broken. Expected at most " + expectedMaxMismatches + " but saw " + mismatches + " for readBases " + new String(read.getReadBases()) + " with cigar " + read.getCigar() + " reference " + reference + " haplotype " + haplotype + " with cigar " + haplotype.getCigar() + " aligned read cigar " + alignedRead.getCigarString() + " @ " + alignedRead.getAlignmentStart()); } } }
readBases = AlignmentUtils.readToAlignmentByteArray(read.getCigar(), readBases); // Adjust the read bases based on the Cigar string byte[] readQuals = read.getBaseQualities(); readQuals = AlignmentUtils.readToAlignmentByteArray(read.getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string final int readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), p, read.getAlignmentStart(), locus); final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1) / 2;
if (ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I) { cigarToAlign.add(ce); final Cigar leftAligned = AlignmentUtils.leftAlignSingleIndel(cigarToAlign, refSeq, readSeq, refIndex, readIndex, false); for ( final CigarElement toAdd : leftAligned.getCigarElements() ) { cigarToReturn.add(toAdd); } refIndex += cigarToAlign.getReferenceLength(); final Cigar result = AlignmentUtils.consolidateCigar(cigarToReturn); if( result.getReferenceLength() != cigar.getReferenceLength() ) throw new IllegalStateException("leftAlignCigarSequentially failed to produce a valid CIGAR. Reference lengths differ. Initial cigar " + cigar + " left aligned into " + result);
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) { // we can not deal with screwy records if ( read.getReadUnmappedFlag() || read.getCigar().numCigarElements() == 0 ) { emit(read); return 0; } // move existing indels (for 1 indel reads only) to leftmost position within identical sequence int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read); if ( numBlocks == 2 ) { Cigar newCigar = AlignmentUtils.leftAlignIndel(IndelRealigner.unclipCigar(read.getCigar()), ref.getBases(), read.getReadBases(), 0, 0, true); newCigar = IndelRealigner.reclipCigar(newCigar, read); read.setCigar(newCigar); } emit(read); return 1; }
/** * Returns a map with the original read as a key and the realigned read as the value. * <p> * Missing keys or equivalent key and value pairs mean that the read was not realigned. * </p> * @return never {@code null} */ private Map<GATKSAMRecord,GATKSAMRecord> realignReadsToTheirBestHaplotype(final ReadLikelihoods<Haplotype> originalReadLikelihoods, final Haplotype refHaplotype, final GenomeLoc paddedReferenceLoc) { final Collection<ReadLikelihoods<Haplotype>.BestAllele> bestAlleles = originalReadLikelihoods.bestAlleles(); final Map<GATKSAMRecord,GATKSAMRecord> result = new HashMap<>(bestAlleles.size()); for (final ReadLikelihoods<Haplotype>.BestAllele bestAllele : bestAlleles) { final GATKSAMRecord originalRead = bestAllele.read; final Haplotype bestHaplotype = bestAllele.allele; final boolean isInformative = bestAllele.isInformative(); final GATKSAMRecord realignedRead = AlignmentUtils.createReadAlignedToRef(originalRead, bestHaplotype, refHaplotype, paddedReferenceLoc.getStart(), isInformative); result.put(originalRead,realignedRead); } return result; }
public static long mismatchingQualities(GATKSAMRecord r, byte[] refSeq, int refIndex) { return getMismatchCount(r, refSeq, refIndex).mismatchQualities; }
@Test(enabled = !DEBUG, dataProvider = "LeftAlignIndelDataProvider") public void testLeftAlignIndelData(final Cigar originalCigar, final Cigar expectedCigar, final byte[] reference, final byte[] read, final int repeatLength) { final Cigar actualCigar = AlignmentUtils.leftAlignIndel(originalCigar, reference, read, 0, 0, true); Assert.assertTrue(expectedCigar.equals(actualCigar), "Wrong left alignment detected for cigar " + originalCigar.toString() + " to " + actualCigar.toString() + " but expected " + expectedCigar.toString() + " with repeat length " + repeatLength); }
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final PileupElement pileupElement, final int alignmentStart, final int refLocus) { return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isDeletion(), alignmentStart, refLocus ); }
@Test(enabled = !DEBUG, dataProvider = "SoftClipsDataProvider") public void testSoftClipsData(final byte[] qualsOfSoftClipsOnLeft, final int middleSize, final String middleOp, final byte[] qualOfSoftClipsOnRight, final int qualThreshold, final int numExpected) { final int readLength = (middleOp.equals("D") ? 0 : middleSize) + qualOfSoftClipsOnRight.length + qualsOfSoftClipsOnLeft.length; final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, readLength); final byte[] bases = Utils.dupBytes((byte) 'A', readLength); final byte[] matchBytes = middleOp.equals("D") ? new byte[]{} : Utils.dupBytes((byte)30, middleSize); final byte[] quals = ArrayUtils.addAll(ArrayUtils.addAll(qualsOfSoftClipsOnLeft, matchBytes), qualOfSoftClipsOnRight); // set the read's bases and quals read.setReadBases(bases); read.setBaseQualities(quals); final StringBuilder cigar = new StringBuilder(); if (qualsOfSoftClipsOnLeft.length > 0 ) cigar.append(qualsOfSoftClipsOnLeft.length + "S"); if (middleSize > 0 ) cigar.append(middleSize + middleOp); if (qualOfSoftClipsOnRight.length > 0 ) cigar.append(qualOfSoftClipsOnRight.length + "S"); read.setCigarString(cigar.toString()); final int actual = AlignmentUtils.calcNumHighQualitySoftClips(read, (byte) qualThreshold); Assert.assertEquals(actual, numExpected, "Wrong number of soft clips detected for read " + read.getSAMString()); }
@Test(dataProvider = "ReadOffsetFromCigarData", enabled = !DEBUG) public void testReadOffsetFromCigar(final String cigarString, final int startOnCigar, final int expectedOffset) { final Cigar cigar = TextCigarCodec.decode(cigarString); final int actualOffset = AlignmentUtils.calcFirstBaseMatchingReferenceInCigar(cigar, startOnCigar); Assert.assertEquals(actualOffset, expectedOffset); }
@Test(dataProvider = "ApplyCigarToCigarData", enabled = !DEBUG) public void testApplyCigarToCigar(final String firstToSecondString, final String secondToThirdString, final String expectedCigarString) { final Cigar firstToSecond = TextCigarCodec.decode(firstToSecondString); final Cigar secondToThird = TextCigarCodec.decode(secondToThirdString); final Cigar expectedCigar = TextCigarCodec.decode(expectedCigarString); final Cigar actualCigar = AlignmentUtils.applyCigarToCigar(firstToSecond, secondToThird); Assert.assertEquals(actualCigar, expectedCigar); }
@Test(dataProvider = "AddCigarElementsData", enabled = !DEBUG) public void testAddCigarElements(final String cigarString, final int pos, final int start, final int end, final String expectedCigarString) { final Cigar cigar = TextCigarCodec.decode(cigarString); final CigarElement elt = cigar.getCigarElement(0); final Cigar expectedCigar = TextCigarCodec.decode(expectedCigarString); final List<CigarElement> elts = new LinkedList<CigarElement>(); final int actualEndPos = AlignmentUtils.addCigarElements(elts, pos, start, end, elt); Assert.assertEquals(actualEndPos, pos + elt.getLength()); Assert.assertEquals(AlignmentUtils.consolidateCigar(new Cigar(elts)), expectedCigar); }
byte[] readBases = read.getReadBases(); readBases = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readBases); // Adjust the read bases based on the Cigar string byte[] readQuals = read.getBaseQualities(); readQuals = AlignmentUtils.readToAlignmentByteArray(p.getRead().getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string int readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, read.getAlignmentStart(), locus); final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1) / 2;