/** * Should we clip a downstream portion of a read because it spans off the end of a haplotype? * * @param read the read in question * @param refWindowStop the end of the reference window * @return true if the read needs to be clipped, false otherwise */ protected static boolean mustClipDownstream(final GATKSAMRecord read, final int refWindowStop) { return ( !read.isEmpty() && read.getSoftStart() < refWindowStop && read.getSoftStart() + read.getReadLength() - 1 > refWindowStop ); }
public static int getReadCoordinateForReferenceCoordinateUpToEndOfRead(GATKSAMRecord read, int refCoord, ClippingTail tail) { final int leftmostSafeVariantPosition = Math.max(read.getSoftStart(), refCoord); return getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), leftmostSafeVariantPosition, tail, false); }
public static void adjustQualsOfOverlappingPairedFragments( final List<GATKSAMRecord> overlappingPair ) { if( overlappingPair.size() != 2 ) { throw new ReviewedGATKException("Found overlapping pair with " + overlappingPair.size() + " reads, but expecting exactly 2."); } final GATKSAMRecord firstRead = overlappingPair.get(0); final GATKSAMRecord secondRead = overlappingPair.get(1); if ( secondRead.getSoftStart() < firstRead.getSoftStart() ) { adjustQualsOfOverlappingPairedFragments(secondRead, firstRead); } else { adjustQualsOfOverlappingPairedFragments(firstRead, secondRead); } }
public static List<GATKSAMRecord> mergeOverlappingPairedFragments( final List<GATKSAMRecord> overlappingPair ) { if( overlappingPair.size() != 2 ) { throw new ReviewedGATKException("Found overlapping pair with " + overlappingPair.size() + " reads, but expecting exactly 2."); } final GATKSAMRecord firstRead = overlappingPair.get(0); final GATKSAMRecord secondRead = overlappingPair.get(1); final GATKSAMRecord merged; if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) { merged = mergeOverlappingPairedFragments(secondRead, firstRead); } else { merged = mergeOverlappingPairedFragments(firstRead, secondRead); } return merged == null ? overlappingPair : Collections.singletonList(merged); }
/** * Should we clip a upstream portion of a read because it spans off the end of a haplotype? * * @param read the read in question * @param refWindowStart the start of the reference window * @return true if the read needs to be clipped, false otherwise */ protected static boolean mustClipUpstream(final GATKSAMRecord read, final int refWindowStart) { return ( !read.isEmpty() && read.getSoftStart() < refWindowStart && read.getSoftEnd() > refWindowStart ); }
@Override protected boolean isUsableRead(final GATKSAMRecord read, final int refLoc) { return super.isUsableRead(read, refLoc) && read.getSoftStart() + read.getCigar().getReadLength() > refLoc; } }
/** * The function assumes that read contains the variant allele. * * @param read * @param variantStartPosition the location of the variant in the reference * @return */ protected Pair<OptionalInt, OptionalInt> getVariantPositionInRead(final GATKSAMRecord read, final int variantStartPosition) { final Pair<Integer, Boolean> refPositionAndDeletionFlag = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), variantStartPosition, true); // the +1 is needed there because getReadCoordinateForReferenceCoordinate() returns the number of read bases from the left end to the variant - 1 int numReadBasesFromLeftEndToVariant = refPositionAndDeletionFlag.getFirst() + 1; // we don't take advantage of fallsInsideOrJustBeforeDeletionOrSkippedRegion flag now, but we might want to, so I will leave it here in comments. // boolean fallsInsideOrJustBeforeDeletionOrSkippedRegion = refPositionAndDeletionFlag.getSecond(); if ( numReadBasesFromLeftEndToVariant == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { return new Pair(OptionalInt.empty(), OptionalInt.empty()); } else { int leftOffset = numReadBasesFromLeftEndToVariant - 1; int rightOffset = read.getReadLength() - numReadBasesFromLeftEndToVariant; return new Pair(OptionalInt.of(leftOffset), OptionalInt.of(rightOffset)); } }
public void add(GATKSAMRecord read) { final int readStart = read.getSoftStart(); final int readStop = read.getSoftEnd(); if ( loc == null ) loc = parser.createGenomeLoc(read.getReferenceName(), readStart, Math.max(readStop, readStart)); // in case it's all an insertion else if ( readStop > loc.getStop() ) loc = parser.createGenomeLoc(loc.getContig(), loc.getStart(), readStop); reads.add(read); }
@Test(dataProvider = "ClipDownstreamProvider", enabled = true) public void clipDownstreamTest(final int readStart, final int readLength, final int delLength) { final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, readStart, readLength); if ( delLength == 0 ) read.setCigarString(readLength + "M"); else read.setCigarString((readLength / 2) + "M" + delLength + "D" + (readLength / 2) + "M"); final boolean result = PairHMMIndelErrorModel.mustClipDownstream(read, refWindowEnd); Assert.assertEquals(result, read.getSoftStart() < refWindowEnd && read.getSoftStart() + readLength > refWindowEnd); }
@Test(enabled = !DEBUG) public void testRevertEntirelySoftclippedReads() { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar("2H1S3H", 0); GATKSAMRecord clippedRead = ReadClipper.revertSoftClippedBases(read); Assert.assertEquals(clippedRead.getAlignmentStart(), read.getSoftStart()); }
public void setRead(final GATKSAMRecord read) { if ( !read.isEmpty() ) { this.read = read; if ( ! read.getReadUnmappedFlag() ) loc = genomeLocParser.createGenomeLoc(read.getReferenceName(), read.getSoftStart(), read.getSoftEnd()); } } }
/** * Pre-processes the results of getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int) to take care of * two corner cases: * * 1. If clipping the right tail (end of the read) getReadCoordinateForReferenceCoordinate and fall inside * a deletion return the base after the deletion. If clipping the left tail (beginning of the read) it * doesn't matter because it already returns the previous base by default. * * 2. If clipping the left tail (beginning of the read) getReadCoordinateForReferenceCoordinate and the * read starts with an insertion, and you're requesting the first read based coordinate, it will skip * the leading insertion (because it has the same reference coordinate as the following base). * * @param read * @param refCoord * @param tail * @return the read coordinate corresponding to the requested reference coordinate for clipping. */ @Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd() || (read.getUnclippedEnd() < read.getUnclippedStart())"}) @Ensures({"result >= 0", "result < read.getReadLength()"}) public static int getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord, ClippingTail tail) { return getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), refCoord, tail, false); }
/** * Returns the read coordinate corresponding to the requested reference coordinate. * * WARNING: if the requested reference coordinate happens to fall inside or just before a deletion (or skipped region) in the read, this function * will return the last read base before the deletion (or skipped region). This function returns a * Pair(int readCoord, boolean fallsInsideOrJustBeforeDeletionOrSkippedRegion) so you can choose which readCoordinate to use when faced with * a deletion (or skipped region). * * SUGGESTION: Use getReadCoordinateForReferenceCoordinate(GATKSAMRecord, int, ClippingTail) instead to get a * pre-processed result according to normal clipping needs. Or you can use this function and tailor the * behavior to your needs. * * @param read * @param refCoord the requested reference coordinate * @return the read coordinate corresponding to the requested reference coordinate. (see warning!) */ @Requires({"refCoord >= read.getSoftStart()", "refCoord <= read.getSoftEnd()"}) @Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"}) //TODO since we do not have contracts any more, should we check for the requirements in the method code? public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord) { return getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), refCoord, false); }
@Test(enabled = true, dataProvider = "RevertSoftClipsBeforeContig") public void testRevertSoftClippedBasesBeforeStartOfContig(final int softStart, final int alignmentStart) { final int nMatches = 10; final int nSoft = -1 * (softStart - alignmentStart); final String cigar = nSoft + "S" + nMatches + "M"; final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); read.setAlignmentStart(alignmentStart); Assert.assertEquals(read.getSoftStart(), softStart); Assert.assertEquals(read.getAlignmentStart(), alignmentStart); Assert.assertEquals(read.getCigarString(), cigar); final GATKSAMRecord reverted = ReadClipper.revertSoftClippedBases(read); final int expectedAlignmentStart = 1; final String expectedCigar = (1 - softStart) + "H" + read.getAlignmentEnd() + "M"; Assert.assertEquals(reverted.getSoftStart(), expectedAlignmentStart); Assert.assertEquals(reverted.getAlignmentStart(), expectedAlignmentStart); Assert.assertEquals(reverted.getCigarString(), expectedCigar); }
/** * Tests the GenomeLoc variable in the ReadBin after adding arbitrary reads * * @param cigarString the read's cigar string * @param alignmentStart the read's alignment start */ @Test(enabled = true, dataProvider = "reads") public void testAddingReads(String cigarString, int alignmentStart) { final GATKSAMRecord read = createReadAndAddToBin(cigarString, alignmentStart); final GenomeLoc readLoc = parser.createGenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getSoftStart(), Math.max(read.getSoftStart(), read.getSoftEnd())); Assert.assertEquals(readBin.getLocation(), readLoc); readBin.clear(); }
@Test(dataProvider = "ClipUpstreamProvider", enabled = true) public void clipUpstreamTest(final int readStart, final int readLength, final int delLength) { final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, readStart, readLength); if ( delLength == 0 ) read.setCigarString(readLength + "M"); else read.setCigarString((readLength / 2) + "M" + delLength + "D" + (readLength / 2) + "M"); final boolean result = PairHMMIndelErrorModel.mustClipUpstream(read, refWindowStart); Assert.assertEquals(result, read.getSoftStart() < refWindowStart && read.getSoftEnd() > refWindowStart); }
@Override protected Double getElementForRead(final GATKSAMRecord read, final int refLoc) { final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true ); if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) return null; // If the offset inside a deletion, it does not lie on a read. if ( AlignmentUtils.isInsideDeletion(read.getCigar(), offset) ) { return INVALID_ELEMENT_FROM_READ; } int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, 0, 0 ); final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read ); if (readPos > numAlignedBases / 2) readPos = numAlignedBases - (readPos + 1); return (double)readPos; }
@Override protected Double getElementForRead(final GATKSAMRecord read, final int refLoc) { final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true); if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) return null; // If the offset inside a deletion, it does not lie on a read. if ( AlignmentUtils.isInsideDeletion(read.getCigar(), offset) ) { return INVALID_ELEMENT_FROM_READ; } int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), offset, false, 0, 0); final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read ); if (readPos > numAlignedBases / 2) readPos = numAlignedBases - (readPos + 1); return (double)readPos; }
@Test(enabled = !DEBUG) public void testHardClipByReferenceCoordinatesLeftTail() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); int alnStart = read.getAlignmentStart(); int alnEnd = read.getAlignmentEnd(); if (read.getSoftStart() == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side for (int i = alnStart; i <= alnEnd; i++) { GATKSAMRecord clipLeft = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, i); if (!clipLeft.isEmpty()) { Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); assertUnclippedLimits(read, clipLeft); } } } } }
@Test(enabled = !DEBUG) public void testHardClipByReferenceCoordinates() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); int start = read.getSoftStart(); int stop = read.getSoftEnd(); for (int i = start; i <= stop; i++) { GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, i); if (!clipLeft.isEmpty()) { Assert.assertTrue(clipLeft.getAlignmentStart() >= Math.min(read.getAlignmentEnd(), i + 1), String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); assertUnclippedLimits(read, clipLeft); } GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, -1); if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. Assert.assertTrue(clipRight.getAlignmentEnd() <= Math.max(read.getAlignmentStart(), i - 1), String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); assertUnclippedLimits(read, clipRight); } } } }