public boolean finalizeUpdate() { // if we haven't made any changes, don't do anything if ( newCigar == null ) return false; if ( newStart == -1 ) newStart = read.getAlignmentStart(); else if ( Math.abs(newStart - read.getAlignmentStart()) > MAX_POS_MOVE_ALLOWED ) { logger.debug(String.format("Attempting to realign read %s at %d more than %d bases to %d.", read.getReadName(), read.getAlignmentStart(), MAX_POS_MOVE_ALLOWED, newStart)); return false; } // store the old CIGAR and start in case we need to back out final Cigar oldCigar = read.getCigar(); final int oldStart = read.getAlignmentStart(); // try updating the read with the new CIGAR and start read.setCigar(newCigar); read.setAlignmentStart(newStart); // back out if necessary if ( realignmentProducesBadAlignment(read) ) { read.setCigar(oldCigar); read.setAlignmentStart(oldStart); return false; } // annotate the record with the original cigar and start (if it changed) if ( !NO_ORIGINAL_ALIGNMENT_TAGS ) { read.setAttribute(ORIGINAL_CIGAR_TAG, oldCigar.toString()); if ( newStart != oldStart ) read.setAttribute(ORIGINAL_POSITION_TAG, oldStart); } return true; }
/** * Loads the read that is going to be evaluated in following calls to {@link #calculateLocalLikelihoods}. * * @param read the target read. * @throws NullPointerException if {@code read} is null. */ @Override public void loadRead(final GATKSAMRecord read) { loadRead(read.getReadBases(),read.getBaseQualities(),read.getBaseInsertionQualities(),read.getBaseDeletionQualities(),read.getMappingQuality()); }
private Set<GATKSAMRecord> filterNonPassingReads( final ActiveRegion activeRegion ) { final Set<GATKSAMRecord> readsToRemove = new LinkedHashSet<>(); for( final GATKSAMRecord rec : activeRegion.getReads() ) { if( rec.getReadLength() < READ_LENGTH_FILTER_THRESHOLD || rec.getMappingQuality() < READ_QUALITY_FILTER_THRESHOLD || (!isBadMateFilterDisabled && BadMateFilter.hasBadMate(rec)) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) { readsToRemove.add(rec); } } activeRegion.removeAll( readsToRemove ); return readsToRemove; }
/** * Does read start at the same position as described by currentContextIndex and currentAlignmentStart? * * @param read the read we want to test * @param currentContigIndex the contig index (from the read's getReferenceIndex) of the reads in this state manager * @param currentAlignmentStart the alignment start of the of the left-most position on the * genome of the reads in this read state manager * @return true if read has contig index and start equal to the current ones */ private boolean readStartsAtCurrentPosition(final GATKSAMRecord read, final int currentContigIndex, final int currentAlignmentStart) { return read.getAlignmentStart() == currentAlignmentStart && read.getReferenceIndex() == currentContigIndex; }
/** * Is a base inside a read? * * @param read the read to evaluate * @param referenceCoordinate the reference coordinate of the base to test * @return true if it is inside the read, false otherwise. */ public static boolean isInsideRead(final GATKSAMRecord read, final int referenceCoordinate) { return referenceCoordinate >= read.getAlignmentStart() && referenceCoordinate <= read.getAlignmentEnd(); }
/** * Should we clip a upstream portion of a read because it spans off the end of a haplotype? * * @param read the read in question * @param refWindowStart the start of the reference window * @return true if the read needs to be clipped, false otherwise */ protected static boolean mustClipUpstream(final GATKSAMRecord read, final int refWindowStart) { return ( !read.isEmpty() && read.getSoftStart() < refWindowStart && read.getSoftEnd() > refWindowStart ); }
/** * Creates a hard-clipped view on a existing read record. * @param read the underlying unclipped read. * @param start inclusive first position in {@code read} included in the clipped view. * @param end inclusive last position in {@code read} included in the clipped view. */ public ClippedGATKSAMRecord(final GATKSAMRecord read, int start, int end) { super(read.getHeader()); this.setReferenceIndex(read.getReferenceIndex()); this.setAlignmentStart(read.getAlignmentStart() + start); this.setMappingQuality(100); // setting read indexing bin below this.setFlags(read.getFlags()); this.setMateReferenceIndex(read.getMateReferenceIndex()); this.setMateAlignmentStart(read.getMateAlignmentStart()); this.setInferredInsertSize(read.getInferredInsertSize()); this.setReadBases(Arrays.copyOfRange(read.getReadBases(), start, end)); this.setBaseQualities(Arrays.copyOfRange(read.getBaseQualities(),start,end)); this.setReadName(read.getReadName()); insertionQuals = Arrays.copyOfRange(read.getBaseInsertionQualities(),start,end); deletionQuals = Arrays.copyOfRange(read.getBaseDeletionQualities(),start,end); // Set these to null in order to mark them as being candidates for lazy initialization. // If this is not done, they will have non-null defaults. super.setReadName(null); super.setCigarString(null); super.setReadBases(null); super.setBaseQualities(null); // Do this after the above because setCigarString will clear it. GATKBin.setReadIndexingBin(this, -1); }
/** * Write out a representation of this haplotype as a read * * @param haplotype a haplotype to write out. Cannot be null * @param paddedRefLoc the reference location. Cannot be null * @param isAmongBestHaplotypes true if among the best haplotypes, false if it was just one possible but not so good */ private void writeHaplotype(final Haplotype haplotype, final GenomeLoc paddedRefLoc, final boolean isAmongBestHaplotypes) { final GATKSAMRecord record = new GATKSAMRecord(output.getHeader()); record.setReadBases(haplotype.getBases()); record.setAlignmentStart(paddedRefLoc.getStart() + haplotype.getAlignmentStartHapwrtRef()); record.setBaseQualities(Utils.dupBytes((byte) '!', haplotype.getBases().length)); record.setCigar(AlignmentUtils.consolidateCigar(haplotype.getCigar())); record.setMappingQuality(isAmongBestHaplotypes ? 60 : 0); record.setReadName("HC" + uniqueNameCounter++); record.setAttribute(AlignmentUtils.HAPLOTYPE_TAG,haplotype.hashCode()); record.setReadUnmappedFlag(false); record.setReferenceIndex(paddedRefLoc.getContigIndex()); record.setAttribute(SAMTag.RG.toString(), READ_GROUP_ID); record.setFlags(16); output.add(record); }
private GATKSAMRecord createReadOffContig(final SAMFileHeader header, final boolean negStrand, final int pre, final int post) { final int contigLen = header.getSequence(0).getSequenceLength(); final int readLen = pre + contigLen + post; final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, readLen); read.setAlignmentStart(1); read.setCigar(TextCigarCodec.decode(pre + "S" + contigLen + "M" + post + "S")); read.setBaseQualities(Utils.dupBytes((byte) 30, readLen)); read.setReadBases(Utils.dupBytes((byte)'A', readLen)); read.setMappingQuality(60); read.setMateAlignmentStart(1); read.setProperPairFlag(true); read.setReadPairedFlag(true); read.setInferredInsertSize(30); read.setReadNegativeStrandFlag(negStrand); read.setMateNegativeStrandFlag(! negStrand); read.setReadGroup(new GATKSAMReadGroupRecord("foo")); return read; }
@Test(enabled = !DEBUG) public void testHardClipByReferenceCoordinatesRightTail() { for (Cigar cigar : cigarList) { GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); int alnStart = read.getAlignmentStart(); int alnEnd = read.getAlignmentEnd(); if (read.getSoftEnd() == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side for (int i = alnStart; i <= alnEnd; i++) { GATKSAMRecord clipRight = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, i); if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString())); assertUnclippedLimits(read, clipRight); } } } } }
protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) { SAMFileHeader header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test"); header.setSequenceDictionary(dictionary); header.setSortOrder(SAMFileHeader.SortOrder.coordinate); GATKSAMRecord record = new GATKSAMRecord(header); record.setReadName(readName); record.setReferenceIndex(dictionary.getSequenceIndex(contig)); record.setAlignmentStart(alignmentStart); Cigar cigar = new Cigar(); int len = alignmentEnd - alignmentStart + 1; cigar.add(new CigarElement(len, CigarOperator.M)); record.setCigar(cigar); record.setReadString(new String(new char[len]).replace("\0", "A")); record.setBaseQualities(new byte[len]); record.setReadGroup(new GATKSAMReadGroupRecord(header.getReadGroup("test"))); return record; }
protected GATKSAMRecord buildSAMRecord(final String readName, final String contig, final int alignmentStart) { GATKSAMRecord record = new GATKSAMRecord(header); record.setReadName(readName); record.setReferenceIndex(dictionary.getSequenceIndex(contig)); record.setAlignmentStart(alignmentStart); record.setCigarString("1M"); record.setReadString("A"); record.setBaseQualityString("A"); record.setReadGroup(readGroup); return record; }
/** * Build a SAM record featuring the absolute minimum required dataset. * * @param contig Contig to populate. * @param alignmentStart start of alignment * @param alignmentEnd end of alignment * * @return New SAM Record */ protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) { SAMFileHeader header = new SAMFileHeader(); header.setSequenceDictionary(sequenceSourceFile.getSequenceDictionary()); GATKSAMRecord record = new GATKSAMRecord(header); record.setReadName(readName); record.setReferenceIndex(sequenceSourceFile.getSequenceDictionary().getSequenceIndex(contig)); record.setAlignmentStart(alignmentStart); Cigar cigar = new Cigar(); int len = alignmentEnd - alignmentStart + 1; cigar.add(new CigarElement(len, CigarOperator.M)); record.setCigar(cigar); record.setReadBases(new byte[len]); record.setBaseQualities(new byte[len]); return record; }
@Test public void testUnmappedReadsDoNotFail() { // create an unmapped read final GATKSAMRecord read = new GATKSAMRecord(ArtificialSAMUtils.createArtificialSamHeader()); read.setReadName("foo"); read.setReferenceName("*"); read.setAlignmentStart(100); read.setCigarString("*"); read.setReadUnmappedFlag(true); // try to add it to the manager final OverhangFixingManager manager = new OverhangFixingManager(null, null, null, 100, 1, 30, false); manager.addRead(read); // we just want to make sure that the following call does not fail Assert.assertTrue(true); } }
@Test(enabled = true, dataProvider = "RevertSoftClipsBeforeContig") public void testRevertSoftClippedBasesBeforeStartOfContig(final int softStart, final int alignmentStart) { final int nMatches = 10; final int nSoft = -1 * (softStart - alignmentStart); final String cigar = nSoft + "S" + nMatches + "M"; final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); read.setAlignmentStart(alignmentStart); Assert.assertEquals(read.getSoftStart(), softStart); Assert.assertEquals(read.getAlignmentStart(), alignmentStart); Assert.assertEquals(read.getCigarString(), cigar); final GATKSAMRecord reverted = ReadClipper.revertSoftClippedBases(read); final int expectedAlignmentStart = 1; final String expectedCigar = (1 - softStart) + "H" + read.getAlignmentEnd() + "M"; Assert.assertEquals(reverted.getSoftStart(), expectedAlignmentStart); Assert.assertEquals(reverted.getAlignmentStart(), expectedAlignmentStart); Assert.assertEquals(reverted.getCigarString(), expectedCigar); }
private GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) { GATKSAMRecord unclipped = (GATKSAMRecord) read.clone(); Cigar unclippedCigar = new Cigar(); int matchesCount = 0; for (CigarElement element : read.getCigar().getCigarElements()) { if (element.getOperator() == CigarOperator.SOFT_CLIP || element.getOperator() == CigarOperator.MATCH_OR_MISMATCH) matchesCount += element.getLength(); else if (matchesCount > 0) { unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH)); matchesCount = 0; unclippedCigar.add(element); } else unclippedCigar.add(element); } if (matchesCount > 0) unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH)); unclipped.setCigar(unclippedCigar); final int newStart = read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), unclippedCigar); unclipped.setAlignmentStart(newStart); if ( newStart <= 0 ) { // if the start of the unclipped read occurs before the contig, // we must hard clip away the bases since we cannot represent reads with // negative or 0 alignment start values in the SAMRecord (e.g., 0 means unaligned) return hardClip(unclipped, 0, - newStart); } else { return unclipped; } }
@Test(enabled = true, expectedExceptions={UserException.class}) public void testMoreThanMaxCycleFails() { int readLength = RAC.MAXIMUM_CYCLE_VALUE + 1; GATKSAMRecord read = ReadUtils.createRandomRead(readLength); read.setReadPairedFlag(true); read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID")); read.getReadGroup().setPlatform("illumina"); ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1); covariate.recordValues(read, readCovariates); }
@Test( ) public void testCreationFromSAMRecordUnmappedButOnGenome() { final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5); read.setReadUnmappedFlag(true); read.setCigarString("*"); final GenomeLoc loc = genomeLocParser.createGenomeLoc(read); Assert.assertEquals(loc.getContig(), read.getReferenceName()); Assert.assertEquals(loc.getContigIndex(), (int)read.getReferenceIndex()); Assert.assertEquals(loc.getStart(), read.getAlignmentStart()); Assert.assertEquals(loc.getStop(), read.getAlignmentStart()); }
public void setRead(final GATKSAMRecord read) { if ( !read.isEmpty() ) { this.read = read; if ( ! read.getReadUnmappedFlag() ) loc = genomeLocParser.createGenomeLoc(read.getReferenceName(), read.getSoftStart(), read.getSoftEnd()); } } }
protected static boolean[] calculateKnownSites( final GATKSAMRecord read, final List<Feature> features ) { final int readLength = read.getReadBases().length; final boolean[] knownSites = new boolean[readLength]; Arrays.fill(knownSites, false); for( final Feature f : features ) { if ((f.getStart() < read.getSoftStart() && f.getEnd() < read.getSoftStart()) || (f.getStart() > read.getSoftEnd() && f.getEnd() > read.getSoftEnd())) { // feature is outside clipping window for the read, ignore continue; } int featureStartOnRead = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), f.getStart(), ReadUtils.ClippingTail.LEFT_TAIL, true); // BUGBUG: should I use LEFT_TAIL here? if( featureStartOnRead == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { featureStartOnRead = 0; } int featureEndOnRead = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), f.getEnd(), ReadUtils.ClippingTail.LEFT_TAIL, true); if( featureEndOnRead == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { featureEndOnRead = readLength; } if( featureStartOnRead > readLength ) { featureStartOnRead = featureEndOnRead = readLength; } Arrays.fill(knownSites, Math.max(0, featureStartOnRead), Math.min(readLength, featureEndOnRead + 1), true); } return knownSites; }