protected static GATKSAMRecord fixMisencodedQuals(final GATKSAMRecord read) { final byte[] quals = read.getBaseQualities(); for ( int i = 0; i < quals.length; i++ ) { quals[i] -= encodingFixValue; if ( quals[i] < 0 ) throw new UserException.BadInput("while fixing mis-encoded base qualities we encountered a read that was correctly encoded; we cannot handle such a mixture of reads so unfortunately the BAM must be fixed with some other tool"); } read.setBaseQualities(quals); return read; }
/** * Clears all attributes except ReadGroup of the read. */ public GATKSAMRecord simplify () { GATKSAMReadGroupRecord rg = getReadGroup(); // save the read group information byte[] insQuals = (this.getAttribute(BQSR_BASE_INSERTION_QUALITIES) == null) ? null : getBaseInsertionQualities(); byte[] delQuals = (this.getAttribute(BQSR_BASE_DELETION_QUALITIES) == null) ? null : getBaseDeletionQualities(); this.clearAttributes(); // clear all attributes from the read this.setReadGroup(rg); // restore read group if (insQuals != null) this.setBaseQualities(insQuals, EventType.BASE_INSERTION); // restore base insertion if we had any if (delQuals != null) this.setBaseQualities(delQuals, EventType.BASE_DELETION); // restore base deletion if we had any return this; }
@DataProvider(name = "IndelLengthAndBasesTest") public Object[][] makeIndelLengthAndBasesTest() { final String EVENT_BASES = "ACGTACGTACGT"; final List<Object[]> tests = new LinkedList<Object[]>(); for ( int eventSize = 1; eventSize < 10; eventSize++ ) { for ( final CigarOperator indel : Arrays.asList(CigarOperator.D, CigarOperator.I) ) { final String cigar = String.format("2M%d%s1M", eventSize, indel.toString()); final String eventBases = indel == CigarOperator.D ? "" : EVENT_BASES.substring(0, eventSize); final int readLength = 3 + eventBases.length(); GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, readLength); read.setReadBases(("TT" + eventBases + "A").getBytes()); final byte[] quals = new byte[readLength]; for ( int i = 0; i < readLength; i++ ) quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE); read.setBaseQualities(quals); read.setCigarString(cigar); tests.add(new Object[]{read, indel, eventSize, eventBases.equals("") ? null : eventBases}); } } return tests.toArray(new Object[][]{}); }
/** * Setters and Accessors for base insertion and base deletion quality scores */ public void setBaseQualities( final byte[] quals, final EventType errorModel ) { switch( errorModel ) { case BASE_SUBSTITUTION: setBaseQualities(quals); break; case BASE_INSERTION: setAttribute( GATKSAMRecord.BQSR_BASE_INSERTION_QUALITIES, quals == null ? null : SAMUtils.phredToFastq(quals) ); break; case BASE_DELETION: setAttribute( GATKSAMRecord.BQSR_BASE_DELETION_QUALITIES, quals == null ? null : SAMUtils.phredToFastq(quals) ); break; default: throw new ReviewedGATKException("Unrecognized Base Recalibration type: " + errorModel ); } }
@DataProvider(name = "PrevAndNextTest") public Object[][] makePrevAndNextTest() { final List<Object[]> tests = new LinkedList<Object[]>(); final List<CigarOperator> operators = Arrays.asList(CigarOperator.I, CigarOperator.P, CigarOperator.S); for ( final CigarOperator firstOp : Arrays.asList(CigarOperator.M) ) { for ( final CigarOperator lastOp : Arrays.asList(CigarOperator.M, CigarOperator.D) ) { for ( final int nIntermediate : Arrays.asList(1, 2, 3) ) { for ( final List<CigarOperator> combination : Utils.makePermutations(operators, nIntermediate, false) ) { final int readLength = 2 + combination.size(); GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, readLength); read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); read.setBaseQualities(Utils.dupBytes((byte) 30, readLength)); String cigar = "1" + firstOp; for ( final CigarOperator op : combination ) cigar += "1" + op; cigar += "1" + lastOp; read.setCigarString(cigar); tests.add(new Object[]{read, firstOp, lastOp, combination}); } } } } return tests.toArray(new Object[][]{}); }
private GATKSAMRecord modifyBaseQualities(final GATKSAMRecord read, final int startOffset, final int length) throws Exception { final GATKSAMRecord readWithLowQuals = (GATKSAMRecord)read.clone(); final byte[] withLowQuals = Arrays.copyOf(read.getBaseQualities(), read.getBaseQualities().length); for ( int i = startOffset; i < startOffset + length; i++ ) withLowQuals[i] = (byte)(read.getBaseQualities()[i] + (i % 2 == 0 ? -1 : 0)); readWithLowQuals.setBaseQualities(withLowQuals); return readWithLowQuals; }
@Test(enabled = !DEBUG, dataProvider = "SoftClipsDataProvider") public void testSoftClipsData(final byte[] qualsOfSoftClipsOnLeft, final int middleSize, final String middleOp, final byte[] qualOfSoftClipsOnRight, final int qualThreshold, final int numExpected) { final int readLength = (middleOp.equals("D") ? 0 : middleSize) + qualOfSoftClipsOnRight.length + qualsOfSoftClipsOnLeft.length; final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, readLength); final byte[] bases = Utils.dupBytes((byte) 'A', readLength); final byte[] matchBytes = middleOp.equals("D") ? new byte[]{} : Utils.dupBytes((byte)30, middleSize); final byte[] quals = ArrayUtils.addAll(ArrayUtils.addAll(qualsOfSoftClipsOnLeft, matchBytes), qualOfSoftClipsOnRight); // set the read's bases and quals read.setReadBases(bases); read.setBaseQualities(quals); final StringBuilder cigar = new StringBuilder(); if (qualsOfSoftClipsOnLeft.length > 0 ) cigar.append(qualsOfSoftClipsOnLeft.length + "S"); if (middleSize > 0 ) cigar.append(middleSize + middleOp); if (qualOfSoftClipsOnRight.length > 0 ) cigar.append(qualOfSoftClipsOnRight.length + "S"); read.setCigarString(cigar.toString()); final int actual = AlignmentUtils.calcNumHighQualitySoftClips(read, (byte) qualThreshold); Assert.assertEquals(actual, numExpected, "Wrong number of soft clips detected for read " + read.getSAMString()); }
/** * Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read * * @param header the SAM header to associate the read with * @param name the name of the read * @param refIndex the reference index, i.e. what chromosome to associate it with * @param alignmentStart where to start the alignment * @param bases the sequence of the read * @param qual the qualities of the read * @return the artificial read */ public static GATKSAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual) { if (bases.length != qual.length) { throw new ReviewedGATKException("Passed in read string is different length then the quality array"); } GATKSAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length); rec.setReadBases(bases); rec.setBaseQualities(qual); rec.setReadGroup(new GATKSAMReadGroupRecord("x")); if (refIndex == -1) { rec.setReadUnmappedFlag(true); } return rec; }
@Test (enabled = true) public void testReadWithNsRefIndexInDeletion() throws FileNotFoundException { final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary()); final int readLength = 76; final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 8975, readLength); read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); read.setBaseQualities(Utils.dupBytes((byte)30, readLength)); read.setCigarString("3M414N1D73M"); final int result = ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, 9392, ReadUtils.ClippingTail.LEFT_TAIL); Assert.assertEquals(result, 2); }
@Test (enabled = true) public void testReadWithNsRefAfterDeletion() throws FileNotFoundException { final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary()); final int readLength = 76; final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 8975, readLength); read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); read.setBaseQualities(Utils.dupBytes((byte)30, readLength)); read.setCigarString("3M414N1D73M"); final int result = ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, 9393, ReadUtils.ClippingTail.LEFT_TAIL); Assert.assertEquals(result, 3); }
private GATKSAMRecord makeRead(final String baseString) { final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, 10); final byte[] bases = baseString.getBytes(); read.setReadBases(bases.clone()); read.setBaseQualities(Utils.dupBytes((byte)30, read.getReadLength())); return read; }
public GATKSAMRecord makeRead() { GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, readLength); read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); final byte[] quals = new byte[readLength]; for ( int i = 0; i < readLength; i++ ) quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE); read.setBaseQualities(quals); read.setCigarString(cigarString); return read; } }
private GATKSAMRecord makeOverlappingRead(final String leftFlank, final int leftQual, final String overlapBases, final byte[] overlapQuals, final String rightFlank, final int rightQual, final int alignmentStart) { final String bases = leftFlank + overlapBases + rightFlank; final int readLength = bases.length(); final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, alignmentStart, readLength); final byte[] leftQuals = Utils.dupBytes((byte) leftQual, leftFlank.length()); final byte[] rightQuals = Utils.dupBytes((byte) rightQual, rightFlank.length()); final byte[] quals = Utils.concat(leftQuals, overlapQuals, rightQuals); read.setCigarString(readLength + "M"); read.setReadBases(bases.getBytes()); for ( final EventType type : EventType.values() ) read.setBaseQualities(quals, type); read.setReadGroup(rgForMerged); read.setMappingQuality(60); return read; }
@Test public void testRBPMappingQuals() { // create a read with high MQ final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, 10); read.setReadBases(Utils.dupBytes((byte) 'A', 10)); read.setBaseQualities(Utils.dupBytes((byte) 30, 10)); read.setCigarString("10M"); read.setMappingQuality(200); // set a MQ higher than max signed byte // now create the RBP final List<PileupElement> elts = new LinkedList<>(); elts.add(new PileupElement(read, 0, read.getCigar().getCigarElement(0), 0, 0)); final Map<String, ReadBackedPileupImpl> pileupsBySample = new HashMap<>(); pileupsBySample.put("foo", new ReadBackedPileupImpl(loc, elts)); final ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, pileupsBySample); Assert.assertEquals(pileup.getMappingQuals()[0], 200); } }
private List<PileupElement> makeReads( final int n, final int mapq, final String op ) { final int readLength = 3; final List<PileupElement> elts = new LinkedList<PileupElement>(); for ( int i = 0; i < n; i++ ) { GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, readLength); read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); read.setBaseQualities(Utils.dupBytes((byte) 30, readLength)); read.setCigarString("1M1" + op + "1M"); read.setMappingQuality(mapq); final int baseOffset = op.equals("M") ? 1 : 0; final CigarElement cigarElement = read.getCigar().getCigarElement(1); elts.add(new PileupElement(read, baseOffset, cigarElement, 1, 0)); } return elts; }
@Test(enabled = false && ! DEBUG) public void testWholeIndelReadInIsolation() { final int firstLocus = 44367789; GATKSAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header, "indelOnly", 0, firstLocus, 76); indelOnlyRead.setReadBases(Utils.dupBytes((byte)'A',76)); indelOnlyRead.setBaseQualities(Utils.dupBytes((byte) '@', 76)); indelOnlyRead.setCigarString("76I"); List<GATKSAMRecord> reads = Arrays.asList(indelOnlyRead); // create the iterator by state with the fake reads and fake records li = makeLTBS(reads); // Traditionally, reads that end with indels bleed into the pileup at the following locus. Verify that the next pileup contains this read // and considers it to be an indel-containing read. Assert.assertTrue(li.hasNext(),"Should have found a whole-indel read in the normal base pileup without extended events enabled"); AlignmentContext alignmentContext = li.next(); Assert.assertEquals(alignmentContext.getLocation().getStart(), firstLocus, "Base pileup is at incorrect location."); ReadBackedPileup basePileup = alignmentContext.getBasePileup(); Assert.assertEquals(basePileup.getReads().size(),1,"Pileup is of incorrect size"); Assert.assertSame(basePileup.getReads().get(0), indelOnlyRead, "Read in pileup is incorrect"); }
@Override protected void setUp() { header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); for ( int j = 0; j < nReads; j++ ) { GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, readLength); read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); final byte[] quals = new byte[readLength]; for ( int i = 0; i < readLength; i++ ) quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE); read.setBaseQualities(quals); read.setCigarString(cigar); reads.add(read); } }
protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) { SAMFileHeader header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test"); header.setSequenceDictionary(dictionary); header.setSortOrder(SAMFileHeader.SortOrder.coordinate); GATKSAMRecord record = new GATKSAMRecord(header); record.setReadName(readName); record.setReferenceIndex(dictionary.getSequenceIndex(contig)); record.setAlignmentStart(alignmentStart); Cigar cigar = new Cigar(); int len = alignmentEnd - alignmentStart + 1; cigar.add(new CigarElement(len, CigarOperator.M)); record.setCigar(cigar); record.setReadString(new String(new char[len]).replace("\0", "A")); record.setBaseQualities(new byte[len]); record.setReadGroup(new GATKSAMReadGroupRecord(header.getReadGroup("test"))); return record; }
/** * Build a SAM record featuring the absolute minimum required dataset. * * @param contig Contig to populate. * @param alignmentStart start of alignment * @param alignmentEnd end of alignment * * @return New SAM Record */ protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) { SAMFileHeader header = new SAMFileHeader(); header.setSequenceDictionary(sequenceSourceFile.getSequenceDictionary()); GATKSAMRecord record = new GATKSAMRecord(header); record.setReadName(readName); record.setReferenceIndex(sequenceSourceFile.getSequenceDictionary().getSequenceIndex(contig)); record.setAlignmentStart(alignmentStart); Cigar cigar = new Cigar(); int len = alignmentEnd - alignmentStart + 1; cigar.add(new CigarElement(len, CigarOperator.M)); record.setCigar(cigar); record.setReadBases(new byte[len]); record.setBaseQualities(new byte[len]); return record; }
private GATKSAMRecord createReadOffContig(final SAMFileHeader header, final boolean negStrand, final int pre, final int post) { final int contigLen = header.getSequence(0).getSequenceLength(); final int readLen = pre + contigLen + post; final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, readLen); read.setAlignmentStart(1); read.setCigar(TextCigarCodec.decode(pre + "S" + contigLen + "M" + post + "S")); read.setBaseQualities(Utils.dupBytes((byte) 30, readLen)); read.setReadBases(Utils.dupBytes((byte)'A', readLen)); read.setMappingQuality(60); read.setMateAlignmentStart(1); read.setProperPairFlag(true); read.setReadPairedFlag(true); read.setInferredInsertSize(30); read.setReadNegativeStrandFlag(negStrand); read.setMateNegativeStrandFlag(! negStrand); read.setReadGroup(new GATKSAMReadGroupRecord("foo")); return read; }