/** * Determines whether the read aligns off the end of the contig. * Pulled out to make it testable. * * @param read the read to check * @return true if it aligns off the end */ protected static boolean realignmentProducesBadAlignment(final GATKSAMRecord read, final int contigLength) { return read.getAlignmentEnd() > contigLength; }
/** * Is a base inside a read? * * @param read the read to evaluate * @param referenceCoordinate the reference coordinate of the base to test * @return true if it is inside the read, false otherwise. */ public static boolean isInsideRead(final GATKSAMRecord read, final int referenceCoordinate) { return referenceCoordinate >= read.getAlignmentStart() && referenceCoordinate <= read.getAlignmentEnd(); }
private boolean useSoftClippedBases(GATKSAMRecord read, long eventStartPos, int eventLength) { return !((read.getAlignmentStart() >= eventStartPos-eventLength && read.getAlignmentStart() <= eventStartPos+1) || (read.getAlignmentEnd() >= eventStartPos && read.getAlignmentEnd() <= eventStartPos + eventLength)); }
/** * Hard clip the read to the variable region (from refStart to refStop) * * @param read the read to be clipped * @param refStart the beginning of the variant region (inclusive) * @param refStop the end of the variant region (inclusive) * @return the read hard clipped to the variant region */ public static GATKSAMRecord hardClipToRegion( final GATKSAMRecord read, final int refStart, final int refStop ) { final int start = read.getAlignmentStart(); final int stop = read.getAlignmentEnd(); return hardClipToRegion(read, refStart, refStop,start,stop); }
/** * Calculates the reference coordinate for the end of the read taking into account soft clips but not hard clips. * * Note: getUnclippedEnd() adds soft and hard clips, this function only adds soft clips. * * @return the unclipped end of the read taking soft clips (but not hard clips) into account */ public int getSoftEnd() { if ( softEnd == UNINITIALIZED ) { boolean foundAlignedBase = false; softEnd = getAlignmentEnd(); final List<CigarElement> cigs = getCigar().getCigarElements(); for (int i = cigs.size() - 1; i >= 0; --i) { final CigarElement cig = cigs.get(i); final CigarOperator op = cig.getOperator(); if (op == CigarOperator.SOFT_CLIP) // assumes the soft clip that we found is at the end of the aligned read softEnd += cig.getLength(); else if (op != CigarOperator.HARD_CLIP) { foundAlignedBase = true; break; } } if( !foundAlignedBase ) { // for example 64H14S, the soft end is actually the same as the alignment end softEnd = getAlignmentEnd(); } } return softEnd; }
if (pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd()) { pileupElements.add(LocusIteratorByState.createPileupForReadAndOffset(right, pos - rightStart));
read.getUnclippedStart(), read.getAlignmentEnd(), read.getUnclippedEnd(), read.getCigarString(), readLikelihoods[i][j]);
@Test(enabled = !DEBUG) public void testHardClipByReferenceCoordinatesRightTail() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); int alnStart = read.getAlignmentStart(); int alnEnd = read.getAlignmentEnd(); if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side for (int i = alnStart; i <= alnEnd; i++) { GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i); if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); assertUnclippedLimits(read, clipRight); } } } } }
@Test(enabled = !DEBUG) public void testHardClipBothEndsByReferenceCoordinates() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); int alnStart = read.getAlignmentStart(); int alnEnd = read.getAlignmentEnd(); int readLength = alnStart - alnEnd; for (int i = 0; i < readLength / 2; i++) { GATKSAMRecord clippedRead = ReadClipper.hardClipBothEndsByReferenceCoordinates(read, alnStart + i, alnEnd - i); Assert.assertTrue(clippedRead.getAlignmentStart() >= alnStart + i, String.format("Clipped alignment start is less than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString())); Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString())); assertUnclippedLimits(read, clippedRead); } } }
private FragmentUtilsTest(String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) { super(FragmentUtilsTest.class, String.format("%s-leftIsFirst:%b-leftIsNegative:%b", name, leftIsFirst, leftIsNegative)); List<GATKSAMRecord> pair = ArtificialSAMUtils.createPair(header, "readpair", readLen, leftStart, rightStart, leftIsFirst, leftIsNegative); GATKSAMRecord left = pair.get(0); GATKSAMRecord right = pair.get(1); for ( int pos = leftStart; pos < rightStart + readLen; pos++) { boolean posCoveredByLeft = pos >= left.getAlignmentStart() && pos <= left.getAlignmentEnd(); boolean posCoveredByRight = pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd(); if ( posCoveredByLeft || posCoveredByRight ) { List<GATKSAMRecord> reads = new ArrayList<>(); List<Integer> offsets = new ArrayList<>(); if ( posCoveredByLeft ) { reads.add(left); offsets.add(pos - left.getAlignmentStart()); } if ( posCoveredByRight ) { reads.add(right); offsets.add(pos - right.getAlignmentStart()); } boolean shouldBeFragment = posCoveredByLeft && posCoveredByRight; ReadBackedPileup pileup = new ReadBackedPileupImpl(null, reads, offsets); TestState testState = new TestState(shouldBeFragment ? 0 : 1, shouldBeFragment ? 1 : 0, pileup, null); statesForPileup.add(testState); } TestState testState = left.getAlignmentEnd() >= right.getAlignmentStart() ? new TestState(0, 1, null, pair) : new TestState(2, 0, null, pair); statesForReads.add(testState); } } }
@Test(enabled = true, dataProvider = "RevertSoftClipsBeforeContig") public void testRevertSoftClippedBasesBeforeStartOfContig(final int softStart, final int alignmentStart) { final int nMatches = 10; final int nSoft = -1 * (softStart - alignmentStart); final String cigar = nSoft + "S" + nMatches + "M"; final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); read.setAlignmentStart(alignmentStart); Assert.assertEquals(read.getSoftStart(), softStart); Assert.assertEquals(read.getAlignmentStart(), alignmentStart); Assert.assertEquals(read.getCigarString(), cigar); final GATKSAMRecord reverted = ReadClipper.revertSoftClippedBases(read); final int expectedAlignmentStart = 1; final String expectedCigar = (1 - softStart) + "H" + read.getAlignmentEnd() + "M"; Assert.assertEquals(reverted.getSoftStart(), expectedAlignmentStart); Assert.assertEquals(reverted.getAlignmentStart(), expectedAlignmentStart); Assert.assertEquals(reverted.getCigarString(), expectedCigar); }
@Test( ) public void testCreationFromSAMRecord() { final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5); final GenomeLoc loc = genomeLocParser.createGenomeLoc(read); Assert.assertEquals(loc.getContig(), read.getReferenceName()); Assert.assertEquals(loc.getContigIndex(), (int)read.getReferenceIndex()); Assert.assertEquals(loc.getStart(), read.getAlignmentStart()); Assert.assertEquals(loc.getStop(), read.getAlignmentEnd()); }
protected static int[] calculateIsSNP( final GATKSAMRecord read, final ReferenceContext ref, final GATKSAMRecord originalRead ) { final byte[] readBases = read.getReadBases(); final byte[] refBases = Arrays.copyOfRange(ref.getBases(), read.getAlignmentStart() - originalRead.getAlignmentStart(), ref.getBases().length + read.getAlignmentEnd() - originalRead.getAlignmentEnd()); final int[] snp = new int[readBases.length]; int readPos = 0;
@Test(enabled = !DEBUG) public void testRevertSoftClippedBases() { for (Cigar cigar : cigarList) { final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP); final int tailSoftClips = leadingCigarElementLength(CigarUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP); final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); final GATKSAMRecord unclipped = ReadClipper.revertSoftClippedBases(read); assertUnclippedLimits(read, unclipped); // Make sure limits haven't changed if (leadingSoftClips > 0 || tailSoftClips > 0) { final int expectedStart = read.getAlignmentStart() - leadingSoftClips; final int expectedEnd = read.getAlignmentEnd() + tailSoftClips; Assert.assertEquals(unclipped.getAlignmentStart(), expectedStart); Assert.assertEquals(unclipped.getAlignmentEnd(), expectedEnd); } else Assert.assertEquals(read.getCigarString(), unclipped.getCigarString()); } }
@Test(enabled = !DEBUG) public void testHardClipByReferenceCoordinates() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); int start = read.getSoftStart(); int stop = read.getSoftEnd(); for (int i = start; i <= stop; i++) { GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, i); if (!clipLeft.isEmpty()) { Assert.assertTrue(clipLeft.getAlignmentStart() >= Math.min(read.getAlignmentEnd(), i + 1), String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); assertUnclippedLimits(read, clipLeft); } GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, -1); if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. Assert.assertTrue(clipRight.getAlignmentEnd() <= Math.max(read.getAlignmentStart(), i - 1), String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); assertUnclippedLimits(read, clipRight); } } } }
/** * Hard clips both tails of a read. * Left tail goes from the beginning to the 'left' coordinate (inclusive) * Right tail goes from the 'right' coordinate (inclusive) until the end of the read * * @param left the coordinate of the last base to be clipped in the left tail (inclusive) * @param right the coordinate of the first base to be clipped in the right tail (inclusive) * @return a new read, without the clipped bases */ @Requires({"left <= right", // tails cannot overlap "left >= read.getAlignmentStart()", // coordinate has to be within the mapped read "right <= read.getAlignmentEnd()"}) // coordinate has to be within the mapped read private GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { if (read.isEmpty() || left == right) return GATKSAMRecord.emptyRead(read); GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1); // after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions // make the left cut index no longer part of the read. In that case, clip the read entirely. if (left > leftTailRead.getAlignmentEnd()) return GATKSAMRecord.emptyRead(read); ReadClipper clipper = new ReadClipper(leftTailRead); return clipper.hardClipByReferenceCoordinatesLeftTail(left); } public static GATKSAMRecord hardClipBothEndsByReferenceCoordinates(GATKSAMRecord read, int left, int right) {
@Test(enabled = !DEBUG) public void testHardClipByReferenceCoordinatesLeftTail() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); int alnStart = read.getAlignmentStart(); int alnEnd = read.getAlignmentEnd(); if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side for (int i = alnStart; i <= alnEnd; i++) { GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i); if (!clipLeft.isEmpty()) { Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); assertUnclippedLimits(read, clipLeft); } } } } }
/** * Is the read dead? That is, can it no longer be in any future active region, and therefore can be discarded? * * read: start |--------> stop ------ stop + extension * region: start |-----------------| end * * Since the regions are coming in order, read could potentially be contained in a future interval if * stop + activeRegionExtension >= end. If, on the other hand, stop + extension is < the end * of this region, then we can discard it, since any future region could only include reads * up to end + 1 - extension. * * Note that this function doesn't care about the dead zone. We're assuming that by * actually calling this function with an active region that region is already in the dead zone, * so checking that the read is in the dead zone doesn't make sense. * * @param read the read we're testing * @param activeRegion the current active region * @return true if the read is dead, false other */ @Requires({"read != null", "activeRegion != null"}) private boolean readCannotOccurInAnyMoreActiveRegions(final GATKSAMRecord read, final ActiveRegion activeRegion) { return read.getReferenceIndex() < activeRegion.getLocation().getContigIndex() || ( read.getReferenceIndex() == activeRegion.getLocation().getContigIndex() && read.getAlignmentEnd() + getActiveRegionExtension() < activeRegion.getLocation().getStop() ); }
final int expectedBpToVisit = read.getAlignmentEnd() - read.getAlignmentStart() + 1; Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp");
int nTailingSoftClips = read.getSoftEnd() - read.getAlignmentEnd(); if (nTailingSoftClips > 0) { for (int i = read.getReadLength() - nTailingSoftClips; i < read.getReadLength() ; i++) {