@Test(dataProvider = "MostLikelyAlleleData") public void testMostLikelyAllele(final List<Allele> alleles, final List<List<Double>> perReadlikelihoods, final Allele best, final Allele second) { final PerReadAlleleLikelihoodMap map = new PerReadAlleleLikelihoodMap(); for ( int readI = 0; readI < perReadlikelihoods.size(); readI++ ) { final List<Double> likelihoods = perReadlikelihoods.get(readI); final byte[] bases = Utils.dupBytes((byte)'A', 10); final byte[] quals = Utils.dupBytes((byte) 30, 10); final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "10M"); read.setReadName("readName" + readI); for ( int i = 0; i < alleles.size(); i++ ) { final Allele allele = alleles.get(i); final double likelihood = likelihoods.get(i); map.add(read, allele, likelihood); } } final MostLikelyAllele mla = map.getMostLikelyDiploidAlleles(); Assert.assertEquals(mla.getMostLikelyAllele(), best); Assert.assertEquals(mla.getSecondMostLikelyAllele(), second); } }
@Test(dataProvider = "RemovingPoorlyModelledReadData") public void testRemovingPoorlyModelledReads(final int readLen, final int nReads, final int nBad, final int nGood) { final PerReadAlleleLikelihoodMap map = new PerReadAlleleLikelihoodMap(); final Set<GATKSAMRecord> goodReads = new HashSet<GATKSAMRecord>(); final Set<GATKSAMRecord> badReads = new HashSet<GATKSAMRecord>(); for ( int readI = 0; readI < nReads; readI++ ) { final boolean bad = readI < nBad; final double likelihood = bad ? -100.0 : 0.0; final byte[] bases = Utils.dupBytes((byte)'A', readLen); final byte[] quals = Utils.dupBytes((byte) 40, readLen); final Allele allele = Allele.create(Utils.dupString("A", readI+1)); final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, readLen + "M"); read.setReadName("readName" + readI); map.add(read, allele, likelihood); (bad ? badReads : goodReads).add(read); } final List<GATKSAMRecord> removedReads = map.filterPoorlyModelledReads(0.01); Assert.assertEquals(removedReads.size(), nBad, "nBad " + nBad + " nGood " + nGood); Assert.assertEquals(new HashSet<GATKSAMRecord>(removedReads), badReads, "nBad " + nBad + " nGood " + nGood); Assert.assertEquals(map.size(), nGood, "nBad " + nBad + " nGood " + nGood); Assert.assertTrue(map.getStoredElements().containsAll(goodReads), "nBad " + nBad + " nGood " + nGood); Assert.assertEquals(map.getStoredElements().size(), nGood, "nBad " + nBad + " nGood " + nGood); }
throw new ReviewedGATKException("Invalid alignment start for artificial read, start = " + alignmentStart); GATKSAMRecord record = new GATKSAMRecord(header); record.setReadName(name); record.setReferenceIndex(refIndex); record.setAlignmentStart(alignmentStart);
@Test public void testUnmappedReadsDoNotFail() { // create an unmapped read final GATKSAMRecord read = new GATKSAMRecord(ArtificialSAMUtils.createArtificialSamHeader()); read.setReadName("foo"); read.setReferenceName("*"); read.setAlignmentStart(100); read.setCigarString("*"); read.setReadUnmappedFlag(true); // try to add it to the manager final OverhangFixingManager manager = new OverhangFixingManager(null, null, null, 100, 1, 30, false); manager.addRead(read); // we just want to make sure that the following call does not fail Assert.assertTrue(true); } }
read2.setReadName(read.getReadName() + ".clone");
@Test public void testSecondaryAlignmentsDoNotCauseAccidentalRemovalOfMate() { final List<GATKSAMRecord> properReads = ArtificialSAMUtils.createPair(header, "foo", 1, 530, 1594, true, false); final GATKSAMRecord read1 = properReads.get(0); read1.setFlags(99); // first in proper pair, mate negative strand final GATKSAMRecord read2Primary = properReads.get(1); read2Primary.setFlags(147); // second in pair, mate unmapped, not primary alignment read2Primary.setAlignmentStart(1596); // move the read final GATKSAMRecord read2NonPrimary = new GATKSAMRecord(read2Primary); read2NonPrimary.setReadName("foo"); read2NonPrimary.setFlags(393); // second in proper pair, on reverse strand read2NonPrimary.setAlignmentStart(451); read2NonPrimary.setMateAlignmentStart(451); final ConstrainedMateFixingManager manager = new ConstrainedMateFixingManager(null, genomeLocParser, 10000, 200, 10000); manager.addRead(read2NonPrimary, false, false); manager.addRead(read1, false, false); for ( int i = 0; i < ConstrainedMateFixingManager.EMIT_FREQUENCY; i++ ) manager.addRead(ArtificialSAMUtils.createArtificialRead(header, "foo" + i, 0, 1500, 10), false, false); Assert.assertTrue(manager.forMateMatching.containsKey("foo")); }
/** * Creates a hard-clipped view on a existing read record. * @param read the underlying unclipped read. * @param start inclusive first position in {@code read} included in the clipped view. * @param end inclusive last position in {@code read} included in the clipped view. */ public ClippedGATKSAMRecord(final GATKSAMRecord read, int start, int end) { super(read.getHeader()); this.setReferenceIndex(read.getReferenceIndex()); this.setAlignmentStart(read.getAlignmentStart() + start); this.setMappingQuality(100); // setting read indexing bin below this.setFlags(read.getFlags()); this.setMateReferenceIndex(read.getMateReferenceIndex()); this.setMateAlignmentStart(read.getMateAlignmentStart()); this.setInferredInsertSize(read.getInferredInsertSize()); this.setReadBases(Arrays.copyOfRange(read.getReadBases(), start, end)); this.setBaseQualities(Arrays.copyOfRange(read.getBaseQualities(),start,end)); this.setReadName(read.getReadName()); insertionQuals = Arrays.copyOfRange(read.getBaseInsertionQualities(),start,end); deletionQuals = Arrays.copyOfRange(read.getBaseDeletionQualities(),start,end); // Set these to null in order to mark them as being candidates for lazy initialization. // If this is not done, they will have non-null defaults. super.setReadName(null); super.setCigarString(null); super.setReadBases(null); super.setBaseQualities(null); // Do this after the above because setCigarString will clear it. GATKBin.setReadIndexingBin(this, -1); }
read2Supp.setReadName("foo"); read2Supp.setFlags(2209); // second in pair, mate negative strand, supplementary read2Supp.setAlignmentStart(100);
protected GATKSAMRecord buildSAMRecord(final String readName, final String contig, final int alignmentStart) { GATKSAMRecord record = new GATKSAMRecord(header); record.setReadName(readName); record.setReferenceIndex(dictionary.getSequenceIndex(contig)); record.setAlignmentStart(alignmentStart); record.setCigarString("1M"); record.setReadString("A"); record.setBaseQualityString("A"); record.setReadGroup(readGroup); return record; }
/** * Creates and writes an artificial read given the appropriate data * * @param readBases the bases * @param contig the contig * @param start the read start * @param cigar the cigar string * @param sample the sample name (used to get the right read group) * @param isNegStrand should this read be on the negative strand? */ private void writeRead(final byte[] readBases, final String contig, final int start, final String cigar, final String sample, final boolean isNegStrand) { final GATKSAMRecord read = new GATKSAMRecord(header); read.setBaseQualities(readQuals); read.setReadBases(readBases); read.setReadName("" + readNameCounter++); read.setCigarString(cigar); read.setReadPairedFlag(false); read.setAlignmentStart(start); read.setMappingQuality(60); read.setReferenceName(contig); read.setReadNegativeStrandFlag(isNegStrand); read.setAttribute("RG", sampleRG(sample).getReadGroupId()); readWriter.addAlignment(read); }
read.setBaseQualities(readQuals); read.setReadBases(readBases); read.setReadName(artificialReadName+readCounter++);
returnRead.setReadGroup( firstRead.getReadGroup() ); returnRead.setReferenceName( firstRead.getReferenceName() ); returnRead.setReadName( firstRead.getReadName() ); final CigarElement c = new CigarElement(bases.length, CigarOperator.M); final ArrayList<CigarElement> cList = new ArrayList<CigarElement>();
protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) { SAMFileHeader header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test"); header.setSequenceDictionary(dictionary); header.setSortOrder(SAMFileHeader.SortOrder.coordinate); GATKSAMRecord record = new GATKSAMRecord(header); record.setReadName(readName); record.setReferenceIndex(dictionary.getSequenceIndex(contig)); record.setAlignmentStart(alignmentStart); Cigar cigar = new Cigar(); int len = alignmentEnd - alignmentStart + 1; cigar.add(new CigarElement(len, CigarOperator.M)); record.setCigar(cigar); record.setReadString(new String(new char[len]).replace("\0", "A")); record.setBaseQualities(new byte[len]); record.setReadGroup(new GATKSAMReadGroupRecord(header.getReadGroup("test"))); return record; }
/** * Write out a representation of this haplotype as a read * * @param haplotype a haplotype to write out. Cannot be null * @param paddedRefLoc the reference location. Cannot be null * @param isAmongBestHaplotypes true if among the best haplotypes, false if it was just one possible but not so good */ private void writeHaplotype(final Haplotype haplotype, final GenomeLoc paddedRefLoc, final boolean isAmongBestHaplotypes) { final GATKSAMRecord record = new GATKSAMRecord(output.getHeader()); record.setReadBases(haplotype.getBases()); record.setAlignmentStart(paddedRefLoc.getStart() + haplotype.getAlignmentStartHapwrtRef()); record.setBaseQualities(Utils.dupBytes((byte) '!', haplotype.getBases().length)); record.setCigar(AlignmentUtils.consolidateCigar(haplotype.getCigar())); record.setMappingQuality(isAmongBestHaplotypes ? 60 : 0); record.setReadName("HC" + uniqueNameCounter++); record.setAttribute(AlignmentUtils.HAPLOTYPE_TAG,haplotype.hashCode()); record.setReadUnmappedFlag(false); record.setReferenceIndex(paddedRefLoc.getContigIndex()); record.setAttribute(SAMTag.RG.toString(), READ_GROUP_ID); record.setFlags(16); output.add(record); }
/** * Build a SAM record featuring the absolute minimum required dataset. * * @param contig Contig to populate. * @param alignmentStart start of alignment * @param alignmentEnd end of alignment * * @return New SAM Record */ protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) { SAMFileHeader header = new SAMFileHeader(); header.setSequenceDictionary(sequenceSourceFile.getSequenceDictionary()); GATKSAMRecord record = new GATKSAMRecord(header); record.setReadName(readName); record.setReferenceIndex(sequenceSourceFile.getSequenceDictionary().getSequenceIndex(contig)); record.setAlignmentStart(alignmentStart); Cigar cigar = new Cigar(); int len = alignmentEnd - alignmentStart + 1; cigar.add(new CigarElement(len, CigarOperator.M)); record.setCigar(cigar); record.setReadBases(new byte[len]); record.setBaseQualities(new byte[len]); return record; }