/** * Given a read, outputs the read bases in a string format * * @param read the read * @return a string representation of the read bases */ public static String convertReadBasesToString(GATKSAMRecord read) { String bases = ""; for (byte b : read.getReadBases()) { bases += (char) b; } return bases.toUpperCase(); }
/** * Returns the reverse complement of the read bases * * @param read the read * @return the reverse complement of the read bases */ public static String getBasesReverseComplement(GATKSAMRecord read) { return getBasesReverseComplement(read.getReadBases()); }
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) { for(byte base: read.getReadBases()) { switch(base) { case 'A': As++; break; case 'C': Cs++; break; case 'G': Gs++; break; case 'T': Ts++; break; } } return 1; }
/** * Get the base aligned to the genome at this location * * If the current element is a deletion returns DELETION_BASE * * @return a base encoded as a byte */ @Ensures("result != DELETION_BASE || (isDeletion() && result == DELETION_BASE)") public byte getBase() { return isDeletion() ? DELETION_BASE : read.getReadBases()[offset]; }
public static String expectedContext (GATKSAMRecord read, int offset, int contextSize) { final String bases = stringFrom(read.getReadBases()); String expectedContext = null; if (offset - contextSize + 1 >= 0) { String context = bases.substring(offset - contextSize + 1, offset + 1); if (!context.contains("N")) expectedContext = context; } return expectedContext; }
private void performBatchAdditions(final List<GATKSAMRecord> reads, final List<Haplotype> haplotypes, Map<GATKSAMRecord,byte[]> gcp) { for(final GATKSAMRecord read : reads){ final byte[] readBases = read.getReadBases(); final byte[] readQuals = read.getBaseQualities(); final byte[] readInsQuals = read.getBaseInsertionQualities(); final byte[] readDelQuals = read.getBaseDeletionQualities(); final byte[] overallGCP = gcp.get(read); batchAdd(haplotypes, readBases, readQuals, readInsQuals, readDelQuals, overallGCP); } }
/** * Main entry routine to add all kmers in a read to the read map counter * @param read Read to add bases */ @Requires("read != null") protected void addReadKmers(final GATKSAMRecord read) { if (DONT_CORRECT_IN_LONG_HOMOPOLYMERS && maxHomopolymerLengthInRegion > MAX_HOMOPOLYMER_THRESHOLD) return; final byte[] readBases = read.getReadBases(); for (int offset = 0; offset <= readBases.length-kmerLength; offset++ ) { countsByKMer.addKmer(new Kmer(readBases,offset,kmerLength),1); } }
/** * Get the bases for an insertion that immediately follows this alignment state, or null if none exists * * @see #getLengthOfImmediatelyFollowingIndel() for details on the meaning of immediately. * * If the immediately following state isn't an insertion, returns null * * @return actual sequence of inserted bases, or a null if the event is a deletion or if there is no event in the associated read. */ @Ensures("result == null || result.length() == getLengthOfImmediatelyFollowingIndel()") public String getBasesOfImmediatelyFollowingInsertion() { final CigarElement element = getNextIndelCigarElement(); if ( element != null && element.getOperator() == CigarOperator.I ) { final int getFrom = offset + 1; final byte[] bases = Arrays.copyOfRange(read.getReadBases(), getFrom, getFrom + element.getLength()); return new String(bases); } else return null; }
private void writeDebugLikelihoods(final GATKSAMRecord processedRead, final Haplotype haplotype, final double log10l){ likelihoodsStream.printf("%s %s %s %s %s %s %f%n", haplotype.getBaseString(), new String(processedRead.getReadBases() ), SAMUtils.phredToFastq(processedRead.getBaseQualities()), SAMUtils.phredToFastq(processedRead.getBaseInsertionQualities() ), SAMUtils.phredToFastq(processedRead.getBaseDeletionQualities() ), SAMUtils.phredToFastq(constantGCP), log10l); }
/** * Loads the read that is going to be evaluated in following calls to {@link #calculateLocalLikelihoods}. * * @param read the target read. * @throws NullPointerException if {@code read} is null. */ @Override public void loadRead(final GATKSAMRecord read) { loadRead(read.getReadBases(),read.getBaseQualities(),read.getBaseInsertionQualities(),read.getBaseDeletionQualities(),read.getMappingQuality()); }
protected boolean[] calculateSkipArray( final GATKSAMRecord read, final RefMetaDataTracker metaDataTracker ) { final byte[] bases = read.getReadBases(); final boolean[] skip = new boolean[bases.length]; final boolean[] knownSites = calculateKnownSites(read, metaDataTracker.getValues(RAC.knownSites)); for( int iii = 0; iii < bases.length; iii++ ) { skip[iii] = !BaseUtils.isRegularBase(bases[iii]) || isLowQualityBase(read, iii) || knownSites[iii] || badSolidOffset(read, iii); } return skip; }
public static GATKSAMRecord combineDuplicates(GenomeLocParser genomeLocParser,List<GATKSAMRecord> duplicates, int maxQScore) { if ( duplicates.size() == 0 ) return null; // make the combined read by copying the first read and setting the // bases and quals to new arrays GATKSAMRecord comb = tmpCopyRead(duplicates.get(0)); //GATKSAMRecord comb = tmpCopyRead(duplicates.get(0)); comb.setDuplicateReadFlag(false); int readLen = comb.getReadBases().length; byte[] bases = new byte[readLen]; byte[] quals = new byte[readLen]; for ( int i = 0; i < readLen; i++ ) { //System.out.printf("I is %d%n", i); //for ( GATKSAMRecord read : duplicates ) { // System.out.printf("dup base %c %d%n", (char)read.getReadBases()[i], read.getBaseQualities()[i]); //} Pair<Byte, Byte> baseAndQual = combineBaseProbs(genomeLocParser,duplicates, i, maxQScore); bases[i] = baseAndQual.getFirst(); quals[i] = baseAndQual.getSecond(); } comb.setBaseQualities(quals); comb.setReadBases(bases); return comb; }
@Test (enabled = true) public void testGetBasesReverseComplement() { int iterations = 1000; Random random = Utils.getRandomGenerator(); while(iterations-- > 0) { final int l = random.nextInt(1000); GATKSAMRecord read = GATKSAMRecord.createRandomRead(l); byte [] original = read.getReadBases(); byte [] reconverted = new byte[l]; String revComp = ReadUtils.getBasesReverseComplement(read); for (int i=0; i<l; i++) { reconverted[l-1-i] = BaseUtils.getComplement((byte) revComp.charAt(i)); } Assert.assertEquals(reconverted, original); } }
@Override public void recordValues(final GATKSAMRecord read, final ReadCovariates values) { // store the original bases and then write Ns over low quality ones final byte[] originalBases = read.getReadBases().clone(); final boolean negativeStrand = read.getReadNegativeStrandFlag(); byte[] bases = read.getReadBases(); if (negativeStrand) bases = BaseUtils.simpleReverseComplement(bases); // don't record reads with N's if (!BaseUtils.isAllRegularBases(bases)) return; for (int i = 0; i < bases.length; i++) { final Pair<byte[], Integer> res = findTandemRepeatUnits(bases, i); // to merge repeat unit and repeat length to get covariate value: final String repeatID = getCovariateValueFromUnitAndLength(res.first, res.second); final int key = keyForRepeat(repeatID); final int readOffset = (negativeStrand ? bases.length - i - 1 : i); values.addCovariate(key, key, key, readOffset); } // put the original bases back in read.setReadBases(originalBases); }
@Test(enabled = !DEBUG, dataProvider = "MergeFragmentsTest") public void testMergingTwoReads(final String name, final GATKSAMRecord read1, final GATKSAMRecord read2, final GATKSAMRecord expectedMerged) { final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2); if ( expectedMerged == null ) { Assert.assertNull(actual, "Expected reads not to merge, but got non-null result from merging"); } else { Assert.assertTrue(actual.isStrandless(), "Merged reads should be strandless"); Assert.assertNotNull(actual, "Expected reads to merge, but got null result from merging"); // I really care about the bases, the quals, the CIGAR, and the read group tag Assert.assertEquals(actual.getCigarString(), expectedMerged.getCigarString()); Assert.assertEquals(actual.getReadBases(), expectedMerged.getReadBases()); Assert.assertEquals(actual.getReadGroup(), expectedMerged.getReadGroup()); Assert.assertEquals(actual.getMappingQuality(), expectedMerged.getMappingQuality()); for ( final EventType type : EventType.values() ) Assert.assertEquals(actual.getBaseQualities(type), expectedMerged.getBaseQualities(type), "Failed base qualities for event type " + type); } }
/** * Asserts that the two reads have the same bases, qualities and cigar strings * * @param actual the calculated read * @param expected the expected read */ public static void assertEqualReads(GATKSAMRecord actual, GATKSAMRecord expected) { // If they're both not empty, test their contents if(!actual.isEmpty() && !expected.isEmpty()) { Assert.assertEquals(actual.getReadBases(), expected.getReadBases()); Assert.assertEquals(actual.getBaseQualities(), expected.getBaseQualities()); Assert.assertEquals(actual.getCigarString(), expected.getCigarString()); } // Otherwise test if they're both empty else Assert.assertEquals(actual.isEmpty(), expected.isEmpty()); } }
@Test(dataProvider = "ReadAlignedToRefData", enabled = true) public void testReadAlignedToRef(final GATKSAMRecord read, final Haplotype haplotype, final int refStart, final int expectedReadStart, final String expectedReadCigar) throws Exception { final HaplotypeBAMWriter writer = new CalledHaplotypeBAMWriter(new MockDestination()); final GATKSAMRecord originalReadCopy = (GATKSAMRecord)read.clone(); if ( expectedReadCigar == null ) { Assert.assertNull(AlignmentUtils.createReadAlignedToRef(read, haplotype, haplotype, refStart, true)); } else { final Cigar expectedCigar = TextCigarCodec.decode(expectedReadCigar); final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, haplotype, refStart, true); Assert.assertEquals(alignedRead.getReadName(), originalReadCopy.getReadName()); Assert.assertEquals(alignedRead.getAlignmentStart(), expectedReadStart); Assert.assertEquals(alignedRead.getReadBases(), originalReadCopy.getReadBases()); Assert.assertEquals(alignedRead.getBaseQualities(), originalReadCopy.getBaseQualities()); Assert.assertEquals(alignedRead.getAlignmentStart(), expectedReadStart); Assert.assertEquals(alignedRead.getCigar(), expectedCigar); Assert.assertNotNull(alignedRead.getAttribute("HC")); } Assert.assertEquals(read, originalReadCopy, "createReadAlignedToRef seems be modifying the original read!"); }
@Test(dataProvider = "ComplexReadAlignedToRef", enabled = !DEBUG) public void testReadAlignedToRefComplexAlignment(final int testIndex, final GATKSAMRecord read, final String reference, final Haplotype haplotype, final int expectedMaxMismatches) throws Exception { final HaplotypeBAMWriter writer = new CalledHaplotypeBAMWriter(new MockDestination()); final GATKSAMRecord alignedRead = AlignmentUtils.createReadAlignedToRef(read, haplotype, new Haplotype(reference.getBytes(),true), 1, true); if ( alignedRead != null ) { final int mismatches = AlignmentUtils.getMismatchCount(alignedRead, reference.getBytes(), alignedRead.getAlignmentStart() - 1).numMismatches; Assert.assertTrue(mismatches <= expectedMaxMismatches, "Alignment of read to ref looks broken. Expected at most " + expectedMaxMismatches + " but saw " + mismatches + " for readBases " + new String(read.getReadBases()) + " with cigar " + read.getCigar() + " reference " + reference + " haplotype " + haplotype + " with cigar " + haplotype.getCigar() + " aligned read cigar " + alignedRead.getCigarString() + " @ " + alignedRead.getAlignmentStart()); } } }
/** * Try to fix the given read using the given split * * @param read the read to fix * @param splice the split (bad region to clip out) */ private void fixSplit(final SplitRead read, final Splice splice) { // if the read doesn't even overlap the split position then we can just exit if ( read.loc == null || !splice.loc.overlapsP(read.loc) ) return; if ( isLeftOverhang(read.loc, splice.loc) ) { final int overhang = splice.loc.getStop() - read.loc.getStart() + 1; if ( overhangingBasesMismatch(read.read.getReadBases(), 0, splice.reference, splice.reference.length - overhang, overhang) ) { final GATKSAMRecord clippedRead = ReadClipper.hardClipByReadCoordinates(read.read, 0, overhang - 1); read.setRead(clippedRead); } } else if ( isRightOverhang(read.loc, splice.loc) ) { final int overhang = read.loc.getStop() - splice.loc.getStart() + 1; if ( overhangingBasesMismatch(read.read.getReadBases(), read.read.getReadLength() - overhang, splice.reference, 0, overhang) ) { final GATKSAMRecord clippedRead = ReadClipper.hardClipByReadCoordinates(read.read, read.read.getReadLength() - overhang, read.read.getReadLength() - 1); read.setRead(clippedRead); } } }
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) { // we can not deal with screwy records if ( read.getReadUnmappedFlag() || read.getCigar().numCigarElements() == 0 ) { emit(read); return 0; } // move existing indels (for 1 indel reads only) to leftmost position within identical sequence int numBlocks = AlignmentUtils.getNumAlignmentBlocks(read); if ( numBlocks == 2 ) { Cigar newCigar = AlignmentUtils.leftAlignIndel(IndelRealigner.unclipCigar(read.getCigar()), ref.getBases(), read.getReadBases(), 0, 0, true); newCigar = IndelRealigner.reclipCigar(newCigar, read); read.setCigar(newCigar); } emit(read); return 1; }