boolean readIsTopStrand= !(new ReadFromTopStrandFilter(true)).filterOut(recOff.getRecord()); char readBase= Character.toUpperCase((char)recOff.getReadBase());
@Override public void addInfo(final AbstractLocusInfo<SamLocusIterator.RecordAndOffset> info, final ReferenceSequence ref, boolean referenceBaseN) { if (referenceBaseN) { return; } // Figure out the coverage while not counting overlapping reads twice, and excluding various things final HashSet<String> readNames = new HashSet<>(info.getRecordAndOffsets().size()); int pileupSize = 0; int unfilteredDepth = 0; for (final SamLocusIterator.RecordAndOffset recs : info.getRecordAndOffsets()) { if (recs.getBaseQuality() <= 2) { ++basesExcludedByBaseq; continue; } // we add to the base quality histogram any bases that have quality > 2 // the raw depth may exceed the coverageCap before the high-quality depth does. So stop counting once we reach the coverage cap. if (unfilteredDepth < coverageCap) { unfilteredBaseQHistogramArray[recs.getRecord().getBaseQualities()[recs.getOffset()]]++; unfilteredDepth++; } if (recs.getBaseQuality() < collectWgsMetrics.MINIMUM_BASE_QUALITY || SequenceUtil.isNoCall(recs.getReadBase())) { ++basesExcludedByBaseq; continue; } if (!readNames.add(recs.getRecord().getReadName())) { ++basesExcludedByOverlap; continue; } pileupSize++; } final int highQualityDepth = Math.min(pileupSize, coverageCap); if (highQualityDepth < pileupSize) basesExcludedByCapping += pileupSize - coverageCap; highQualityDepthHistogramArray[highQualityDepth]++; unfilteredDepthHistogramArray[unfilteredDepth]++; } }
@Override public void addInfo(final AbstractLocusInfo<SamLocusIterator.RecordAndOffset> info, final ReferenceSequence ref, boolean referenceBaseN) { if (referenceBaseN) { return; } // Figure out the coverage while not counting overlapping reads twice, and excluding various things final HashSet<String> readNames = new HashSet<>(info.getRecordAndOffsets().size()); int pileupSize = 0; int unfilteredDepth = 0; for (final SamLocusIterator.RecordAndOffset recs : info.getRecordAndOffsets()) { if (recs.getBaseQuality() <= 2) { ++basesExcludedByBaseq; continue; } // we add to the base quality histogram any bases that have quality > 2 // the raw depth may exceed the coverageCap before the high-quality depth does. So stop counting once we reach the coverage cap. if (unfilteredDepth < coverageCap) { unfilteredBaseQHistogramArray[recs.getRecord().getBaseQualities()[recs.getOffset()]]++; unfilteredDepth++; } if (recs.getBaseQuality() < collectWgsMetrics.MINIMUM_BASE_QUALITY || SequenceUtil.isNoCall(recs.getReadBase())) { ++basesExcludedByBaseq; continue; } if (!readNames.add(recs.getRecord().getReadName())) { ++basesExcludedByOverlap; continue; } pileupSize++; } final int highQualityDepth = Math.min(pileupSize, coverageCap); if (highQualityDepth < pileupSize) basesExcludedByCapping += pileupSize - coverageCap; highQualityDepthHistogramArray[highQualityDepth]++; unfilteredDepthHistogramArray[unfilteredDepth]++; } }
final SAMRecord samrec = rec.getRecord();
private static Integer stratifyHomopolymerLength(final RecordAndOffset recordAndOffset, final SAMLocusAndReference locusInfo) { final ReadDirection direction = ReadDirection.of(recordAndOffset.getRecord()); final byte readBases[] = recordAndOffset.getRecord().getReadBases(); if (SequenceUtil.isNoCall(locusInfo.getReferenceBase())) { return null; } int runLengthOffset = recordAndOffset.getOffset(); if (runLengthOffset < 0 || runLengthOffset >= recordAndOffset.getRecord().getReadLength()) { return null; } if (direction == ReadDirection.POSITIVE) { while (--runLengthOffset >= 0) { if (readBases[runLengthOffset] != readBases[recordAndOffset.getOffset() - 1]) { break; } } return recordAndOffset.getOffset() - runLengthOffset - 1; } else { while (++runLengthOffset < recordAndOffset.getRecord().getReadLength()) { if (readBases[runLengthOffset] != readBases[recordAndOffset.getOffset() + 1]) { break; } } return runLengthOffset - recordAndOffset.getOffset() - 1; } }
@Test public void testSamLocusAndReferenceIterator() { final File reference = new File(TEST_DATA_DIR, "Homo_sapiens_assembly18.trimmed.fasta"); final File samFile = new File(TEST_DATA_DIR, "simpleSmallFile.sam"); final ReferenceSequenceFile referenceSequenceFile = new FastaSequenceFile(reference, false); final ReferenceSequenceFileWalker referenceSequenceFileWalker = new ReferenceSequenceFileWalker(referenceSequenceFile); final SamReader samReader = SamReaderFactory.makeDefault().open(samFile); final SamLocusIterator samLocusIterator = new SamLocusIterator(samReader); final SamLocusAndReferenceIterator samLocusAndReferences = new SamLocusAndReferenceIterator(referenceSequenceFileWalker, samLocusIterator); IntervalList intervalList = new IntervalList(samReader.getFileHeader()); intervalList.add(new Interval("chrM", 1, 36)); intervalList.add(new Interval("chr20", 8401, 8460)); OverlapDetector<Interval> overlapDetector = new OverlapDetector<>(0, 0); overlapDetector.addAll(intervalList.getIntervals(), intervalList.getIntervals()); for (final SamLocusAndReferenceIterator.SAMLocusAndReference samLocusAndReference : samLocusAndReferences) { // The sam file only has coverage in the intervals that are within 'intervalList', and there the coverage should // be exactly 2 since there are two overlapping, paired reads. This is what this test is testing: Assert.assertEquals(samLocusAndReference.getRecordAndOffsets().size(), overlapDetector.overlapsAny(samLocusAndReference.getLocus()) ? 2 : 0, "Position:" + samLocusAndReference.getLocus().toString()); // all the reads are equal to the reference...this is what this test is testing. for (final SamLocusIterator.RecordAndOffset recordAndOffset : samLocusAndReference.getRecordAndOffsets()) Assert.assertTrue(SequenceUtil.basesEqual(samLocusAndReference.getReferenceBase(), recordAndOffset.getReadBase()), "Record: " + recordAndOffset.getRecord() + " Position:" + samLocusAndReference.getLocus().toString()); } }
private static Integer stratifyHomopolymerLength(final RecordAndOffset recordAndOffset, final SAMLocusAndReference locusInfo) { final ReadDirection direction = ReadDirection.of(recordAndOffset.getRecord()); final byte readBases[] = recordAndOffset.getRecord().getReadBases(); if (SequenceUtil.isNoCall(locusInfo.getReferenceBase())) { return null; } int runLengthOffset = recordAndOffset.getOffset(); if (runLengthOffset < 0 || runLengthOffset >= recordAndOffset.getRecord().getReadLength()) { return null; } if (direction == ReadDirection.POSITIVE) { while (--runLengthOffset >= 0) { if (readBases[runLengthOffset] != readBases[recordAndOffset.getOffset() - 1]) { break; } } return recordAndOffset.getOffset() - runLengthOffset - 1; } else { while (++runLengthOffset < recordAndOffset.getRecord().getReadLength()) { if (readBases[runLengthOffset] != readBases[recordAndOffset.getOffset() + 1]) { break; } } return runLengthOffset - recordAndOffset.getOffset() - 1; } }
abstract T stratify(final SAMRecord sam); }
@Override public Integer stratify(final RecordAndOffset recordAndOffset, final SAMLocusAndReference locusInfo) { Integer numberMismatches = recordAndOffset.getRecord().getIntegerAttribute(SAMTag.NM.name()); // Record may not contain an NM tag in which case we cannot stratify over it. if (numberMismatches == null) { return null; } // We are only interested in the number of errors on the read // not including the current base. if (recordAndOffset.getReadBase() != locusInfo.getReferenceBase()) { return numberMismatches - 1; } return numberMismatches; }
abstract T stratify(final SAMRecord sam); }
@Override public Integer stratify(final RecordAndOffset recordAndOffset, final SAMLocusAndReference locusInfo) { Integer numberMismatches = recordAndOffset.getRecord().getIntegerAttribute(SAMTag.NM.name()); // Record may not contain an NM tag in which case we cannot stratify over it. if (numberMismatches == null) { return null; } // We are only interested in the number of errors on the read // not including the current base. if (recordAndOffset.getReadBase() != locusInfo.getReferenceBase()) { return numberMismatches - 1; } return numberMismatches; }
private static Character stratifyReferenceBase(final RecordAndOffset recordAndOffset, final SAMLocusAndReference locusInfo) { final ReadDirection direction = ReadDirection.of(recordAndOffset.getRecord()); if (SequenceUtil.isNoCall(locusInfo.getReferenceBase())) { return null; } return stratifySequenceBase(locusInfo.getReferenceBase(), direction == ReadDirection.NEGATIVE); }
private static Character stratifyReadBase(final RecordAndOffset recordAndOffset, int offset) { final ReadDirection direction = ReadDirection.of(recordAndOffset.getRecord()); final int requestedOffset = recordAndOffset.getOffset() + offset * (direction == ReadDirection.POSITIVE ? 1 : -1); if (requestedOffset < 0 || requestedOffset >= recordAndOffset.getRecord().getReadLength()) { return null; } else { return stratifySequenceBase(recordAndOffset.getRecord().getReadBases()[requestedOffset], direction == ReadDirection.NEGATIVE); } }
/** * Get the one-based cycle number of the base, taking the direction of the read into account */ private static int stratifyCycle(final RecordAndOffset recordAndOffset) { final SAMRecord rec = recordAndOffset.getRecord(); final int offset = recordAndOffset.getOffset(); // Get either the offset into the array or the distance from the end depending on whether the read is // on the negative strand. int retval = rec.getReadNegativeStrandFlag() ? (rec.getReadLength() - offset - 1) : offset; // add 1 to move to a one-based system retval += 1; return retval; }
@Override public CycleBin stratify(final RecordAndOffset recordAndOffset, final SAMLocusAndReference locusInfo) { final int readCycle = stratifyCycle(recordAndOffset); final double relativePosition = (double) readCycle / recordAndOffset.getRecord().getReadLength(); return CycleBin.valueOf(relativePosition); }
/** * Get the one-based cycle number of the base, taking the direction of the read into account */ private static int stratifyCycle(final RecordAndOffset recordAndOffset) { final SAMRecord rec = recordAndOffset.getRecord(); final int offset = recordAndOffset.getOffset(); // Get either the offset into the array or the distance from the end depending on whether the read is // on the negative strand. int retval = rec.getReadNegativeStrandFlag() ? (rec.getReadLength() - offset - 1) : offset; // add 1 to move to a one-based system retval += 1; return retval; }
@Override public CycleBin stratify(final RecordAndOffset recordAndOffset, final SAMLocusAndReference locusInfo) { final int readCycle = stratifyCycle(recordAndOffset); final double relativePosition = (double) readCycle / recordAndOffset.getRecord().getReadLength(); return CycleBin.valueOf(relativePosition); }
private static Character stratifyReadBase(final RecordAndOffset recordAndOffset, int offset) { final ReadDirection direction = ReadDirection.of(recordAndOffset.getRecord()); final int requestedOffset = recordAndOffset.getOffset() + offset * (direction == ReadDirection.POSITIVE ? 1 : -1); if (requestedOffset < 0 || requestedOffset >= recordAndOffset.getRecord().getReadLength()) { return null; } else { return stratifySequenceBase(recordAndOffset.getRecord().getReadBases()[requestedOffset], direction == ReadDirection.NEGATIVE); } }
private static Character stratifyReferenceBase(final RecordAndOffset recordAndOffset, final SAMLocusAndReference locusInfo) { final ReadDirection direction = ReadDirection.of(recordAndOffset.getRecord()); if (SequenceUtil.isNoCall(locusInfo.getReferenceBase())) { return null; } return stratifySequenceBase(locusInfo.getReferenceBase(), direction == ReadDirection.NEGATIVE); }