public static List<GATKSAMRecord> hardClipToRegion( final List<GATKSAMRecord> reads, final int refStart, final int refStop ) { final List<GATKSAMRecord> returnList = new ArrayList<GATKSAMRecord>( reads.size() ); for( final GATKSAMRecord read : reads ) { final GATKSAMRecord clippedRead = hardClipToRegion( read, refStart, refStop ); if( !clippedRead.isEmpty() ) { returnList.add( clippedRead ); } } return returnList; }
/** * Should we clip a upstream portion of a read because it spans off the end of a haplotype? * * @param read the read in question * @param refWindowStart the start of the reference window * @return true if the read needs to be clipped, false otherwise */ protected static boolean mustClipUpstream(final GATKSAMRecord read, final int refWindowStart) { return ( !read.isEmpty() && read.getSoftStart() < refWindowStart && read.getSoftEnd() > refWindowStart ); }
/** * Should we clip a downstream portion of a read because it spans off the end of a haplotype? * * @param read the read in question * @param refWindowStop the end of the reference window * @return true if the read needs to be clipped, false otherwise */ protected static boolean mustClipDownstream(final GATKSAMRecord read, final int refWindowStop) { return ( !read.isEmpty() && read.getSoftStart() < refWindowStop && read.getSoftStart() + read.getReadLength() - 1 > refWindowStop ); }
/** * Turns soft clipped bases into matches * @return a new read with every soft clip turned into a match */ private GATKSAMRecord revertSoftClippedBases() { if (read.isEmpty()) return read; this.addOp(new ClippingOp(0, 0)); return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES); }
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) { if (!read.isEmpty()) { byte[] quals = read.getBaseQualities(); for (int i = 0; i < quals.length; i++) Assert.assertFalse(quals[i] <= low_qual, String.format("Found low qual (%d) base after hard clipping. Position: %d -- %s", low_qual, i, read.getCigarString())); } }
/** * Asserts that the two reads have the same bases, qualities and cigar strings * * @param actual the calculated read * @param expected the expected read */ public static void assertEqualReads(GATKSAMRecord actual, GATKSAMRecord expected) { // If they're both not empty, test their contents if(!actual.isEmpty() && !expected.isEmpty()) { Assert.assertEquals(actual.getReadBases(), expected.getReadBases()); Assert.assertEquals(actual.getBaseQualities(), expected.getBaseQualities()); Assert.assertEquals(actual.getCigarString(), expected.getCigarString()); } // Otherwise test if they're both empty else Assert.assertEquals(actual.isEmpty(), expected.isEmpty()); } }
public void setRead(final GATKSAMRecord read) { if ( !read.isEmpty() ) { this.read = read; if ( ! read.getReadUnmappedFlag() ) loc = genomeLocParser.createGenomeLoc(read.getReferenceName(), read.getSoftStart(), read.getSoftEnd()); } } }
/** * Clips a read according to ops and the chosen algorithm. * * @param algorithm What mode of clipping do you want to apply for the stacked operations. * @return the read with the clipping applied. */ public GATKSAMRecord clipRead(ClippingRepresentation algorithm) { if (ops == null) return getRead(); GATKSAMRecord clippedRead = read; for (ClippingOp op : getOps()) { final int readLength = clippedRead.getReadLength(); //check if the clipped read can still be clipped in the range requested if (op.start < readLength) { ClippingOp fixedOperation = op; if (op.stop >= readLength) fixedOperation = new ClippingOp(op.start, readLength - 1); clippedRead = fixedOperation.apply(algorithm, clippedRead); } } wasClipped = true; ops.clear(); if ( clippedRead.isEmpty() ) return GATKSAMRecord.emptyRead(clippedRead); return clippedRead; }
/** * Hard clips a read using read coordinates. * * @param start the first base to clip (inclusive) * @param stop the last base to clip (inclusive) * @return a new read, without the clipped bases */ @Requires({"start >= 0 && stop <= read.getReadLength() - 1", // start and stop have to be within the read "start == 0 || stop == read.getReadLength() - 1"}) // cannot clip the middle of the read private GATKSAMRecord hardClipByReadCoordinates(int start, int stop) { if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1)) return GATKSAMRecord.emptyRead(read); this.addOp(new ClippingOp(start, stop)); return clipRead(ClippingRepresentation.HARDCLIP_BASES); } public static GATKSAMRecord hardClipByReadCoordinates(GATKSAMRecord read, int start, int stop) {
/** * Hard clips both tails of a read. * Left tail goes from the beginning to the 'left' coordinate (inclusive) * Right tail goes from the 'right' coordinate (inclusive) until the end of the read * * @param left the coordinate of the last base to be clipped in the left tail (inclusive) * @param right the coordinate of the first base to be clipped in the right tail (inclusive) * @return a new read, without the clipped bases */ @Requires({"left <= right", // tails cannot overlap "left >= read.getAlignmentStart()", // coordinate has to be within the mapped read "right <= read.getAlignmentEnd()"}) // coordinate has to be within the mapped read private GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) { if (read.isEmpty() || left == right) return GATKSAMRecord.emptyRead(read); GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1); // after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions // make the left cut index no longer part of the read. In that case, clip the read entirely. if (left > leftTailRead.getAlignmentEnd()) return GATKSAMRecord.emptyRead(read); ReadClipper clipper = new ReadClipper(leftTailRead); return clipper.hardClipByReferenceCoordinatesLeftTail(left); } public static GATKSAMRecord hardClipBothEndsByReferenceCoordinates(GATKSAMRecord read, int left, int right) {
if (read.isEmpty()) return read;
@Test(enabled = !DEBUG) public void testHardClipByReferenceCoordinatesLeftTail() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); int alnStart = read.getAlignmentStart(); int alnEnd = read.getAlignmentEnd(); if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side for (int i = alnStart; i <= alnEnd; i++) { GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i); if (!clipLeft.isEmpty()) { Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); assertUnclippedLimits(read, clipLeft); } } } } }
/** * Hard clips any leading insertions in the read. Only looks at the beginning of the read, not the end. * * @return a new read without leading insertions */ private GATKSAMRecord hardClipLeadingInsertions() { if (read.isEmpty()) return read; for(CigarElement cigarElement : read.getCigar().getCigarElements()) { if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.INSERTION) break; else if (cigarElement.getOperator() == CigarOperator.INSERTION) this.addOp(new ClippingOp(0, cigarElement.getLength() - 1)); } return clipRead(ClippingRepresentation.HARDCLIP_BASES); } public static GATKSAMRecord hardClipLeadingInsertions(GATKSAMRecord read) {
@Test(enabled = !DEBUG) public void testHardClipByReferenceCoordinates() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); int start = read.getSoftStart(); int stop = read.getSoftEnd(); for (int i = start; i <= stop; i++) { GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, i); if (!clipLeft.isEmpty()) { Assert.assertTrue(clipLeft.getAlignmentStart() >= Math.min(read.getAlignmentEnd(), i + 1), String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); assertUnclippedLimits(read, clipLeft); } GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, -1); if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. Assert.assertTrue(clipRight.getAlignmentEnd() <= Math.max(read.getAlignmentStart(), i - 1), String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); assertUnclippedLimits(read, clipRight); } } } }
@Test(enabled = false) public void testHardClipLeadingInsertions() { for (Cigar cigar : cigarList) { if (startsWithInsertion(cigar)) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); GATKSAMRecord clippedRead = ReadClipper.hardClipLeadingInsertions(read); assertUnclippedLimits(read, clippedRead); // Make sure limits haven't changed int expectedLength = read.getReadLength() - leadingCigarElementLength(read.getCigar(), CigarOperator.INSERTION); if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar())) expectedLength -= leadingCigarElementLength(CigarUtils.invertCigar(read.getCigar()), CigarOperator.INSERTION); if (!clippedRead.isEmpty()) { Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there Assert.assertFalse(startsWithInsertion(clippedRead.getCigar())); // check that the insertions are gone } else Assert.assertTrue(expectedLength == 0, String.format("expected length: %d", expectedLength)); // check that the read was expected to be fully clipped } } }
@Test(enabled = !DEBUG) public void testHardClipByReferenceCoordinatesRightTail() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); int alnStart = read.getAlignmentStart(); int alnEnd = read.getAlignmentEnd(); if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side for (int i = alnStart; i <= alnEnd; i++) { GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i); if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); assertUnclippedLimits(read, clipRight); } } } } }
if( !clippedRead.isEmpty() && clippedRead.getCigar().getReadLength() > 0 ) { clippedRead = ReadClipper.hardClipToRegion(clippedRead, activeRegion.getExtendedLoc().getStart(), activeRegion.getExtendedLoc().getStop()); if( activeRegion.readOverlapsRegion(clippedRead) && clippedRead.getReadLength() > 0 ) {
if (read.isEmpty() || nLeadingSoftClips > read.getReadLength()) return GATKSAMRecord.emptyRead(read);
if (read.isEmpty()) return read;
if( read.isEmpty() ) { return 0L; } // the whole read was inside the adaptor so skip it