@Override protected boolean isUsableRead(final GATKSAMRecord read, final int refLoc) { return super.isUsableRead(read, refLoc) && read.getSoftEnd() >= refLoc; }
public static List<GATKSAMRecord> mergeOverlappingPairedFragments( final List<GATKSAMRecord> overlappingPair ) { if( overlappingPair.size() != 2 ) { throw new ReviewedGATKException("Found overlapping pair with " + overlappingPair.size() + " reads, but expecting exactly 2."); } final GATKSAMRecord firstRead = overlappingPair.get(0); final GATKSAMRecord secondRead = overlappingPair.get(1); final GATKSAMRecord merged; if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) { merged = mergeOverlappingPairedFragments(secondRead, firstRead); } else { merged = mergeOverlappingPairedFragments(firstRead, secondRead); } return merged == null ? overlappingPair : Collections.singletonList(merged); }
/** * Should we clip a upstream portion of a read because it spans off the end of a haplotype? * * @param read the read in question * @param refWindowStart the start of the reference window * @return true if the read needs to be clipped, false otherwise */ protected static boolean mustClipUpstream(final GATKSAMRecord read, final int refWindowStart) { return ( !read.isEmpty() && read.getSoftStart() < refWindowStart && read.getSoftEnd() > refWindowStart ); }
public void add(GATKSAMRecord read) { final int readStart = read.getSoftStart(); final int readStop = read.getSoftEnd(); if ( loc == null ) loc = parser.createGenomeLoc(read.getReferenceName(), readStart, Math.max(readStop, readStart)); // in case it's all an insertion else if ( readStop > loc.getStop() ) loc = parser.createGenomeLoc(loc.getContig(), loc.getStart(), readStop); reads.add(read); }
public void setRead(final GATKSAMRecord read) { if ( !read.isEmpty() ) { this.read = read; if ( ! read.getReadUnmappedFlag() ) loc = genomeLocParser.createGenomeLoc(read.getReferenceName(), read.getSoftStart(), read.getSoftEnd()); } } }
protected static boolean[] calculateKnownSites( final GATKSAMRecord read, final List<Feature> features ) { final int readLength = read.getReadBases().length; final boolean[] knownSites = new boolean[readLength]; Arrays.fill(knownSites, false); for( final Feature f : features ) { if ((f.getStart() < read.getSoftStart() && f.getEnd() < read.getSoftStart()) || (f.getStart() > read.getSoftEnd() && f.getEnd() > read.getSoftEnd())) { // feature is outside clipping window for the read, ignore continue; } int featureStartOnRead = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), f.getStart(), ReadUtils.ClippingTail.LEFT_TAIL, true); // BUGBUG: should I use LEFT_TAIL here? if( featureStartOnRead == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { featureStartOnRead = 0; } int featureEndOnRead = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), f.getEnd(), ReadUtils.ClippingTail.LEFT_TAIL, true); if( featureEndOnRead == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { featureEndOnRead = readLength; } if( featureStartOnRead > readLength ) { featureStartOnRead = featureEndOnRead = readLength; } Arrays.fill(knownSites, Math.max(0, featureStartOnRead), Math.min(readLength, featureEndOnRead + 1), true); } return knownSites; }
@Test(dataProvider = "ClipUpstreamProvider", enabled = true) public void clipUpstreamTest(final int readStart, final int readLength, final int delLength) { final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "basicRead", 0, readStart, readLength); if ( delLength == 0 ) read.setCigarString(readLength + "M"); else read.setCigarString((readLength / 2) + "M" + delLength + "D" + (readLength / 2) + "M"); final boolean result = PairHMMIndelErrorModel.mustClipUpstream(read, refWindowStart); Assert.assertEquals(result, read.getSoftStart() < refWindowStart && read.getSoftEnd() > refWindowStart); }
/** * Tests the GenomeLoc variable in the ReadBin after adding arbitrary reads * * @param cigarString the read's cigar string * @param alignmentStart the read's alignment start */ @Test(enabled = true, dataProvider = "reads") public void testAddingReads(String cigarString, int alignmentStart) { final GATKSAMRecord read = createReadAndAddToBin(cigarString, alignmentStart); final GenomeLoc readLoc = parser.createGenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getSoftStart(), Math.max(read.getSoftStart(), read.getSoftEnd())); Assert.assertEquals(readBin.getLocation(), readLoc); readBin.clear(); }
final long readEnd = read.getSoftEnd(); final int numEndSoftClippedBases = softClips ? read.getSoftEnd()- read.getAlignmentEnd() : 0 ; final byte [] unclippedReadBases = read.getReadBases(); final byte [] unclippedReadQuals = read.getBaseQualities();
@Test(enabled = !DEBUG) public void testHardClipByReferenceCoordinatesRightTail() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); int alnStart = read.getAlignmentStart(); int alnEnd = read.getAlignmentEnd(); if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side for (int i = alnStart; i <= alnEnd; i++) { GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i); if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); assertUnclippedLimits(read, clipRight); } } } } }
int sStop = read.getSoftEnd(); int uStart = read.getUnclippedStart(); int uStop = read.getUnclippedEnd();
final GATKSAMRecord secondRead = ReadClipper.hardClipAdaptorSequence(ReadClipper.revertSoftClippedBases(unclippedSecondRead)); if( !(secondRead.getSoftStart() <= firstRead.getSoftEnd() && secondRead.getSoftStart() >= firstRead.getSoftStart() && secondRead.getSoftEnd() >= firstRead.getSoftEnd()) ) { return null; // can't merge them, yet: AAAAAAAAAAA-BBBBBBBBBBB-AAAAAAAAAAAAAA, B is contained entirely inside A
@Test(enabled = !DEBUG) public void testHardClipByReferenceCoordinates() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); int start = read.getSoftStart(); int stop = read.getSoftEnd(); for (int i = start; i <= stop; i++) { GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, i); if (!clipLeft.isEmpty()) { Assert.assertTrue(clipLeft.getAlignmentStart() >= Math.min(read.getAlignmentEnd(), i + 1), String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString())); assertUnclippedLimits(read, clipLeft); } GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, -1); if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. Assert.assertTrue(clipRight.getAlignmentEnd() <= Math.max(read.getAlignmentStart(), i - 1), String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); assertUnclippedLimits(read, clipRight); } } } }
int nTailingSoftClips = read.getSoftEnd() - read.getAlignmentEnd(); if (nTailingSoftClips > 0) { for (int i = read.getReadLength() - nTailingSoftClips; i < read.getReadLength() ; i++) {