public boolean finalizeUpdate() { // if we haven't made any changes, don't do anything if ( newCigar == null ) return false; if ( newStart == -1 ) newStart = read.getAlignmentStart(); else if ( Math.abs(newStart - read.getAlignmentStart()) > MAX_POS_MOVE_ALLOWED ) { logger.debug(String.format("Attempting to realign read %s at %d more than %d bases to %d.", read.getReadName(), read.getAlignmentStart(), MAX_POS_MOVE_ALLOWED, newStart)); return false; } // store the old CIGAR and start in case we need to back out final Cigar oldCigar = read.getCigar(); final int oldStart = read.getAlignmentStart(); // try updating the read with the new CIGAR and start read.setCigar(newCigar); read.setAlignmentStart(newStart); // back out if necessary if ( realignmentProducesBadAlignment(read) ) { read.setCigar(oldCigar); read.setAlignmentStart(oldStart); return false; } // annotate the record with the original cigar and start (if it changed) if ( !NO_ORIGINAL_ALIGNMENT_TAGS ) { read.setAttribute(ORIGINAL_CIGAR_TAG, oldCigar.toString()); if ( newStart != oldStart ) read.setAttribute(ORIGINAL_POSITION_TAG, oldStart); } return true; }
public GATKSAMRecord createReadAndAddToBin(String cigarString, int alignmentStart) { final GATKSAMRecord read = ReadUtils.createRandomRead(readLength); read.setCigarString(cigarString); read.setAlignmentStart(alignmentStart); readBin.add(read); return read; } }
@Test(enabled = true, dataProvider = "RevertSoftClipsBeforeContig") public void testRevertSoftClippedBasesBeforeStartOfContig(final int softStart, final int alignmentStart) { final int nMatches = 10; final int nSoft = -1 * (softStart - alignmentStart); final String cigar = nSoft + "S" + nMatches + "M"; final GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar, 0); read.setAlignmentStart(alignmentStart); Assert.assertEquals(read.getSoftStart(), softStart); Assert.assertEquals(read.getAlignmentStart(), alignmentStart); Assert.assertEquals(read.getCigarString(), cigar); final GATKSAMRecord reverted = ReadClipper.revertSoftClippedBases(read); final int expectedAlignmentStart = 1; final String expectedCigar = (1 - softStart) + "H" + read.getAlignmentEnd() + "M"; Assert.assertEquals(reverted.getSoftStart(), expectedAlignmentStart); Assert.assertEquals(reverted.getAlignmentStart(), expectedAlignmentStart); Assert.assertEquals(reverted.getCigarString(), expectedCigar); }
@Test public void testSecondaryAlignmentsDoNotCauseAccidentalRemovalOfMate() { final List<GATKSAMRecord> properReads = ArtificialSAMUtils.createPair(header, "foo", 1, 530, 1594, true, false); final GATKSAMRecord read1 = properReads.get(0); read1.setFlags(99); // first in proper pair, mate negative strand final GATKSAMRecord read2Primary = properReads.get(1); read2Primary.setFlags(147); // second in pair, mate unmapped, not primary alignment read2Primary.setAlignmentStart(1596); // move the read final GATKSAMRecord read2NonPrimary = new GATKSAMRecord(read2Primary); read2NonPrimary.setReadName("foo"); read2NonPrimary.setFlags(393); // second in proper pair, on reverse strand read2NonPrimary.setAlignmentStart(451); read2NonPrimary.setMateAlignmentStart(451); final ConstrainedMateFixingManager manager = new ConstrainedMateFixingManager(null, genomeLocParser, 10000, 200, 10000); manager.addRead(read2NonPrimary, false, false); manager.addRead(read1, false, false); for ( int i = 0; i < ConstrainedMateFixingManager.EMIT_FREQUENCY; i++ ) manager.addRead(ArtificialSAMUtils.createArtificialRead(header, "foo" + i, 0, 1500, 10), false, false); Assert.assertTrue(manager.forMateMatching.containsKey("foo")); }
record.setReadName(name); record.setReferenceIndex(refIndex); record.setAlignmentStart(alignmentStart); List<CigarElement> elements = new ArrayList<CigarElement>(); elements.add(new CigarElement(length, CigarOperator.characterToEnum('M')));
@Test public void testUnmappedReadsDoNotFail() { // create an unmapped read final GATKSAMRecord read = new GATKSAMRecord(ArtificialSAMUtils.createArtificialSamHeader()); read.setReadName("foo"); read.setReferenceName("*"); read.setAlignmentStart(100); read.setCigarString("*"); read.setReadUnmappedFlag(true); // try to add it to the manager final OverhangFixingManager manager = new OverhangFixingManager(null, null, null, 100, 1, 30, false); manager.addRead(read); // we just want to make sure that the following call does not fail Assert.assertTrue(true); } }
@Test public void testSecondaryAlignmentsDoNotInterfere() { final List<GATKSAMRecord> properReads = ArtificialSAMUtils.createPair(header, "foo", 1, 10, 30, true, false); final GATKSAMRecord read1 = properReads.get(0); read1.setAlignmentStart(8); // move the read read1.setFlags(99); // first in proper pair, mate negative strand final GATKSAMRecord read2Primary = properReads.get(1); read2Primary.setFlags(147); // second in pair, mate unmapped, not primary alignment Assert.assertEquals(read1.getInferredInsertSize(), 21); final GATKSAMRecord read2NonPrimary = new GATKSAMRecord(read2Primary); read2NonPrimary.setFlags(393); // second in proper pair, on reverse strand final ConstrainedMateFixingManager manager = new ConstrainedMateFixingManager(null, genomeLocParser, 1000, 1000, 1000); manager.addRead(read1, true, false); manager.addRead(read2NonPrimary, false, false); manager.addRead(read2Primary, false, false); Assert.assertEquals(manager.getNReadsInQueue(), 3); for ( final SAMRecord read : manager.getReadsInQueueForTesting() ) { if ( read.getFirstOfPairFlag() ) { Assert.assertEquals(read.getFlags(), 99); Assert.assertEquals(read.getInferredInsertSize(), 23); } else if ( read.getNotPrimaryAlignmentFlag() ) { Assert.assertEquals(read.getFlags(), 393); Assert.assertEquals(read.getInferredInsertSize(), -21); } else { Assert.assertEquals(read.getFlags(), 147); Assert.assertEquals(read.getInferredInsertSize(), -23); } } }
hardClippedRead.setCigar(cigarShift.cigar); if (start == 0) hardClippedRead.setAlignmentStart(read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), cigarShift.cigar));
final int readStartOnHaplotype = calcFirstBaseMatchingReferenceInCigar(extendedHaplotypeCigar, swPairwiseAlignment.getAlignmentStart2wrt1()); final int readStartOnReference = referenceStart + haplotype.getAlignmentStartHapwrtRef() + readStartOnHaplotype; read.setAlignmentStart(readStartOnReference); read.resetSoftStartAndEnd();
read2Supp.setReadName("foo"); read2Supp.setFlags(2209); // second in pair, mate negative strand, supplementary read2Supp.setAlignmentStart(100); read2Supp.setMateAlignmentStart(1000);
protected GATKSAMRecord buildSAMRecord(final String readName, final String contig, final int alignmentStart) { GATKSAMRecord record = new GATKSAMRecord(header); record.setReadName(readName); record.setReferenceIndex(dictionary.getSequenceIndex(contig)); record.setAlignmentStart(alignmentStart); record.setCigarString("1M"); record.setReadString("A"); record.setBaseQualityString("A"); record.setReadGroup(readGroup); return record; }
/** * Creates and writes an artificial read given the appropriate data * * @param readBases the bases * @param contig the contig * @param start the read start * @param cigar the cigar string * @param sample the sample name (used to get the right read group) * @param isNegStrand should this read be on the negative strand? */ private void writeRead(final byte[] readBases, final String contig, final int start, final String cigar, final String sample, final boolean isNegStrand) { final GATKSAMRecord read = new GATKSAMRecord(header); read.setBaseQualities(readQuals); read.setReadBases(readBases); read.setReadName("" + readNameCounter++); read.setCigarString(cigar); read.setReadPairedFlag(false); read.setAlignmentStart(start); read.setMappingQuality(60); read.setReferenceName(contig); read.setReadNegativeStrandFlag(isNegStrand); read.setAttribute("RG", sampleRG(sample).getReadGroupId()); readWriter.addAlignment(read); }
read.setReadUnmappedFlag(false); read.setMateUnmappedFlag(false); read.setAlignmentStart(100); read.setCigarString("50M"); read.setMateAlignmentStart(130); bad.setAlignmentStart(1000); tests.add( new Object[]{ "positve read starts after mate end", bad, false });
read.setAlignmentStart(readStart); read.setMappingQuality(artificialMappingQuality); read.setReferenceName(loc.getContig());
protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) { SAMFileHeader header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test"); header.setSequenceDictionary(dictionary); header.setSortOrder(SAMFileHeader.SortOrder.coordinate); GATKSAMRecord record = new GATKSAMRecord(header); record.setReadName(readName); record.setReferenceIndex(dictionary.getSequenceIndex(contig)); record.setAlignmentStart(alignmentStart); Cigar cigar = new Cigar(); int len = alignmentEnd - alignmentStart + 1; cigar.add(new CigarElement(len, CigarOperator.M)); record.setCigar(cigar); record.setReadString(new String(new char[len]).replace("\0", "A")); record.setBaseQualities(new byte[len]); record.setReadGroup(new GATKSAMReadGroupRecord(header.getReadGroup("test"))); return record; }
private GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) { GATKSAMRecord unclipped = (GATKSAMRecord) read.clone(); Cigar unclippedCigar = new Cigar(); int matchesCount = 0; for (CigarElement element : read.getCigar().getCigarElements()) { if (element.getOperator() == CigarOperator.SOFT_CLIP || element.getOperator() == CigarOperator.MATCH_OR_MISMATCH) matchesCount += element.getLength(); else if (matchesCount > 0) { unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH)); matchesCount = 0; unclippedCigar.add(element); } else unclippedCigar.add(element); } if (matchesCount > 0) unclippedCigar.add(new CigarElement(matchesCount, CigarOperator.MATCH_OR_MISMATCH)); unclipped.setCigar(unclippedCigar); final int newStart = read.getAlignmentStart() + calculateAlignmentStartShift(read.getCigar(), unclippedCigar); unclipped.setAlignmentStart(newStart); if ( newStart <= 0 ) { // if the start of the unclipped read occurs before the contig, // we must hard clip away the bases since we cannot represent reads with // negative or 0 alignment start values in the SAMRecord (e.g., 0 means unaligned) return hardClip(unclipped, 0, - newStart); } else { return unclipped; } }
final GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader()); emptyRead.setReferenceIndex(read.getReferenceIndex()); emptyRead.setAlignmentStart(0); emptyRead.setMappingQuality(0);
/** * Write out a representation of this haplotype as a read * * @param haplotype a haplotype to write out. Cannot be null * @param paddedRefLoc the reference location. Cannot be null * @param isAmongBestHaplotypes true if among the best haplotypes, false if it was just one possible but not so good */ private void writeHaplotype(final Haplotype haplotype, final GenomeLoc paddedRefLoc, final boolean isAmongBestHaplotypes) { final GATKSAMRecord record = new GATKSAMRecord(output.getHeader()); record.setReadBases(haplotype.getBases()); record.setAlignmentStart(paddedRefLoc.getStart() + haplotype.getAlignmentStartHapwrtRef()); record.setBaseQualities(Utils.dupBytes((byte) '!', haplotype.getBases().length)); record.setCigar(AlignmentUtils.consolidateCigar(haplotype.getCigar())); record.setMappingQuality(isAmongBestHaplotypes ? 60 : 0); record.setReadName("HC" + uniqueNameCounter++); record.setAttribute(AlignmentUtils.HAPLOTYPE_TAG,haplotype.hashCode()); record.setReadUnmappedFlag(false); record.setReferenceIndex(paddedRefLoc.getContigIndex()); record.setAttribute(SAMTag.RG.toString(), READ_GROUP_ID); record.setFlags(16); output.add(record); }
/** * Build a SAM record featuring the absolute minimum required dataset. * * @param contig Contig to populate. * @param alignmentStart start of alignment * @param alignmentEnd end of alignment * * @return New SAM Record */ protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) { SAMFileHeader header = new SAMFileHeader(); header.setSequenceDictionary(sequenceSourceFile.getSequenceDictionary()); GATKSAMRecord record = new GATKSAMRecord(header); record.setReadName(readName); record.setReferenceIndex(sequenceSourceFile.getSequenceDictionary().getSequenceIndex(contig)); record.setAlignmentStart(alignmentStart); Cigar cigar = new Cigar(); int len = alignmentEnd - alignmentStart + 1; cigar.add(new CigarElement(len, CigarOperator.M)); record.setCigar(cigar); record.setReadBases(new byte[len]); record.setBaseQualities(new byte[len]); return record; }
private GATKSAMRecord createReadOffContig(final SAMFileHeader header, final boolean negStrand, final int pre, final int post) { final int contigLen = header.getSequence(0).getSequenceLength(); final int readLen = pre + contigLen + post; final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, readLen); read.setAlignmentStart(1); read.setCigar(TextCigarCodec.decode(pre + "S" + contigLen + "M" + post + "S")); read.setBaseQualities(Utils.dupBytes((byte) 30, readLen)); read.setReadBases(Utils.dupBytes((byte)'A', readLen)); read.setMappingQuality(60); read.setMateAlignmentStart(1); read.setProperPairFlag(true); read.setReadPairedFlag(true); read.setInferredInsertSize(30); read.setReadNegativeStrandFlag(negStrand); read.setMateNegativeStrandFlag(! negStrand); read.setReadGroup(new GATKSAMReadGroupRecord("foo")); return read; }