protected void resetRefSeqFileWalker() { this.refSeq = new ReferenceSequenceFileWalker(referenceFasta); }
protected void resetRefSeqFileWalker() { this.refSeq = new ReferenceSequenceFileWalker(referenceFasta); }
static ReferenceSequenceFileWalker getReferenceSequenceFileWalker(String referenceString){ ReferenceSequenceFile referenceSequenceFile = createReferenceSequenceFile(referenceString); return new ReferenceSequenceFileWalker(referenceSequenceFile); }
public enum UnmappingReadStrategy { // Leave on record, and copy to tag COPY_TO_TAG(false, true), // Leave on record, but do not create additional tag DO_NOT_CHANGE(false, false), // Add tag with information, and remove from standard fields in record MOVE_TO_TAG(true, true); private final boolean resetMappingInformation, populatePATag; UnmappingReadStrategy(final boolean resetMappingInformation, final boolean populatePATag) { this.resetMappingInformation = resetMappingInformation; this.populatePATag = populatePATag; } public boolean isResetMappingInformation() { return resetMappingInformation; } public boolean isPopulatePaTag() { return populatePATag; } }
private void init(final ReferenceSequenceFile reference, final SAMFileHeader header) { if (header.getSortOrder() == SAMFileHeader.SortOrder.coordinate) { this.pairEndInfoByName = new CoordinateSortedPairEndInfoMap(); } else { this.pairEndInfoByName = new InMemoryPairEndInfoMap(); } if (reference != null) { this.refFileWalker = new ReferenceSequenceFileWalker(reference); } }
public enum UnmappingReadStrategy { // Leave on record, and copy to tag COPY_TO_TAG(false, true), // Leave on record, but do not create additional tag DO_NOT_CHANGE(false, false), // Add tag with information, and remove from standard fields in record MOVE_TO_TAG(true, true); private final boolean resetMappingInformation, populatePATag; UnmappingReadStrategy(final boolean resetMappingInformation, final boolean populatePATag) { this.resetMappingInformation = resetMappingInformation; this.populatePATag = populatePATag; } public boolean isResetMappingInformation() { return resetMappingInformation; } public boolean isPopulatePaTag() { return populatePATag; } }
private void init(final ReferenceSequenceFile reference, final SAMFileHeader header) { if (header.getSortOrder() == SAMFileHeader.SortOrder.coordinate) { this.pairEndInfoByName = new CoordinateSortedPairEndInfoMap(); } else { this.pairEndInfoByName = new InMemoryPairEndInfoMap(); } if (reference != null) { this.refFileWalker = new ReferenceSequenceFileWalker(reference); this.samSequenceDictionary = reference.getSequenceDictionary(); } }
private void init(final ReferenceSequenceFile reference, final SAMFileHeader header) { if (header.getSortOrder() == SAMFileHeader.SortOrder.coordinate) { this.pairEndInfoByName = new CoordinateSortedPairEndInfoMap(); } else { this.pairEndInfoByName = new InMemoryPairEndInfoMap(); } if (reference != null) { this.refFileWalker = new ReferenceSequenceFileWalker(reference); this.samSequenceDictionary = reference.getSequenceDictionary(); } }
@Test(expectedExceptions = {SAMException.class}, dataProvider = "TestFailReference") public void testFailGet(final String fileName, final int index1, final int index2) throws SAMException { final Path refPath = Paths.get(fileName); final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(refPath); try { refWalker.get(index1); refWalker.get(index2); } finally { CloserUtil.close(refWalker); } }
@Test(dataProvider = "TestReference") public void testGetFile(final String fileName, final int index1, final int index2) throws SAMException { final File refFile = new File(fileName); final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(refFile); ReferenceSequence sequence = refWalker.get(index1); Assert.assertEquals(sequence.getContigIndex(), index1); sequence = refWalker.get(index2); Assert.assertEquals(sequence.getContigIndex(), index2); CloserUtil.close(refWalker); }
@Test(dataProvider = "TestReference") public void testGet(final String fileName, final int index1, final int index2) throws SAMException { final Path refPath = Paths.get(fileName); final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(refPath); ReferenceSequence sequence = refWalker.get(index1); Assert.assertEquals(sequence.getContigIndex(), index1); sequence = refWalker.get(index2); Assert.assertEquals(sequence.getContigIndex(), index2); CloserUtil.close(refWalker); }
@Test(expectedExceptions = IllegalArgumentException.class) public void testSamLocusAndReferenceIteratorMismatch() { final File reference = new File(TEST_DATA_DIR, "reference_with_trailing_whitespace.fasta"); final File samFile = new File(TEST_DATA_DIR, "simpleSmallFile.sam"); final ReferenceSequenceFile referenceSequenceFile = new FastaSequenceFile(reference, false); final ReferenceSequenceFileWalker referenceSequenceFileWalker = new ReferenceSequenceFileWalker(referenceSequenceFile); final SamReader samReader = SamReaderFactory.makeDefault().open(samFile); final SamLocusIterator samLocusIterator = new SamLocusIterator(samReader); final SamLocusAndReferenceIterator shouldThrow = new SamLocusAndReferenceIterator(referenceSequenceFileWalker, samLocusIterator); } }
@Test(dataProvider = "testOverlappingErrorCalculatorWithManyReadsData", timeOut = 5000) public void testOverlappingErrorCalculatorWithManyReads(final File temp) throws IOException { try (final ReferenceSequenceFileWalker referenceSequenceFileWalker = new ReferenceSequenceFileWalker(new File("testdata/picard/sam/BamErrorMetrics/chrM.reference.fasta")); final SamLocusIterator samLocusIterator = new SamLocusIterator(SamReaderFactory.make().open(temp)); final SamLocusAndReferenceIterator samLocusAndReferences = new SamLocusAndReferenceIterator( referenceSequenceFileWalker, samLocusIterator)) { BaseErrorAggregation<OverlappingReadsErrorCalculator> aggregation = new BaseErrorAggregation<>(OverlappingReadsErrorCalculator::new, ReadBaseStratification.baseCycleStratifier); for (final SamLocusAndReferenceIterator.SAMLocusAndReference locusAndReference : samLocusAndReferences) { for (SamLocusIterator.RecordAndOffset recordAndOffset : locusAndReference.getRecordAndOffsets()) aggregation.addBase(recordAndOffset, locusAndReference); } } } }
@Test public void testForFilteredBases(){ AbstractLocusIterator iterator = createReadEndsIterator(exampleSamOneRead); CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics(); FastWgsMetricsCollector collector = new FastWgsMetricsCollector(collectWgsMetrics, 100, createIntervalList()); String secondReferenceString = ">ref\nNNNNNNNNNNAATATTCTTC"; ReferenceSequenceFile referenceSequenceFile = createReferenceSequenceFile(secondReferenceString); ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(referenceSequenceFile); WgsMetricsProcessorImpl wgsMetricsProcessor = new WgsMetricsProcessorImpl(iterator, refWalker, collector, progress); wgsMetricsProcessor.processFile(); assertEquals(10, collector.counter); }
protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT); if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { throw new SAMException("Input must be coordinate-sorted for this program to run. Found: " + reader.getFileHeader().getSortOrder()); } final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT); writer.setProgressLogger( new ProgressLogger(log, (int) 1e7, "Wrote", "records")); final ReferenceSequenceFileWalker refSeqWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE); StreamSupport.stream(reader.spliterator(), false) .peek(rec -> fixRecord(rec, refSeqWalker)) .forEach(writer::addAlignment); CloserUtil.close(reader); writer.close(); return 0; }
protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT); if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { throw new SAMException("Input must be coordinate-sorted for this program to run. Found: " + reader.getFileHeader().getSortOrder()); } final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT); writer.setProgressLogger( new ProgressLogger(log, (int) 1e7, "Wrote", "records")); final ReferenceSequenceFileWalker refSeqWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE); StreamSupport.stream(reader.spliterator(), false) .peek(rec -> fixRecord(rec, refSeqWalker)) .forEach(writer::addAlignment); CloserUtil.close(reader); writer.close(); return 0; }
@DataProvider public Object[][] testOverlappingErrorCalculatorWithManyReadsData() throws IOException { final File temp = File.createTempFile("Overlapping", ".bam"); temp.deleteOnExit(); try ( final ReferenceSequenceFileWalker referenceSequenceFileWalker = new ReferenceSequenceFileWalker(new File("testdata/picard/sam/BamErrorMetrics/chrM.reference.fasta"))) { final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(); builder.getHeader().setSequenceDictionary(referenceSequenceFileWalker.getSequenceDictionary()); for (int i = 0; i < 4000; i++) { builder.addPair("Read" + i, 0, 1, 1, false, false, "36M", "36M", true, false, 20); } try (final SAMFileWriter writer = new SAMFileWriterFactory() .setCompressionLevel(2) .makeBAMWriter(builder.getHeader(), false, temp)) { builder.forEach(writer::addAlignment); } } return new Object[][]{{temp}}; }
final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE); final ProgressLogger progressLogger = new ProgressLogger(log);
final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE); final ProgressLogger progressLogger = new ProgressLogger(log);
@Test public void testSamLocusAndReferenceIterator() { final File reference = new File(TEST_DATA_DIR, "Homo_sapiens_assembly18.trimmed.fasta"); final File samFile = new File(TEST_DATA_DIR, "simpleSmallFile.sam"); final ReferenceSequenceFile referenceSequenceFile = new FastaSequenceFile(reference, false); final ReferenceSequenceFileWalker referenceSequenceFileWalker = new ReferenceSequenceFileWalker(referenceSequenceFile); final SamReader samReader = SamReaderFactory.makeDefault().open(samFile); final SamLocusIterator samLocusIterator = new SamLocusIterator(samReader); final SamLocusAndReferenceIterator samLocusAndReferences = new SamLocusAndReferenceIterator(referenceSequenceFileWalker, samLocusIterator); IntervalList intervalList = new IntervalList(samReader.getFileHeader()); intervalList.add(new Interval("chrM", 1, 36)); intervalList.add(new Interval("chr20", 8401, 8460)); OverlapDetector<Interval> overlapDetector = new OverlapDetector<>(0, 0); overlapDetector.addAll(intervalList.getIntervals(), intervalList.getIntervals()); for (final SamLocusAndReferenceIterator.SAMLocusAndReference samLocusAndReference : samLocusAndReferences) { // The sam file only has coverage in the intervals that are within 'intervalList', and there the coverage should // be exactly 2 since there are two overlapping, paired reads. This is what this test is testing: Assert.assertEquals(samLocusAndReference.getRecordAndOffsets().size(), overlapDetector.overlapsAny(samLocusAndReference.getLocus()) ? 2 : 0, "Position:" + samLocusAndReference.getLocus().toString()); // all the reads are equal to the reference...this is what this test is testing. for (final SamLocusIterator.RecordAndOffset recordAndOffset : samLocusAndReference.getRecordAndOffsets()) Assert.assertTrue(SequenceUtil.basesEqual(samLocusAndReference.getReferenceBase(), recordAndOffset.getReadBase()), "Record: " + recordAndOffset.getRecord() + " Position:" + samLocusAndReference.getLocus().toString()); } }