/** * Prepare to filter out SAMRecords that do not overlap the given list of * intervals * @param intervals */ public IntervalKeepPairFilter(final List<Interval> intervals) { this.intervalOverlapDetector = new OverlapDetector<>(0, 0); this.intervalOverlapDetector.addAll(intervals, intervals); }
/** * Creates a new OverlapDetector with no trim and the given set of intervals. */ public static <T extends Locatable> OverlapDetector<T> create(final List<T> intervals) { final OverlapDetector<T> detector = new OverlapDetector<>(0, 0); detector.addAll(intervals, intervals); return detector; }
/** * Prepare to filter out SAMRecords that do not overlap the given list of * intervals * @param intervals */ public IntervalKeepPairFilter(final List<Interval> intervals) { this.intervalOverlapDetector = new OverlapDetector<>(0, 0); this.intervalOverlapDetector.addAll(intervals, intervals); }
/** * Creates a new OverlapDetector with no trim and the given set of intervals. */ public static <T extends Locatable> OverlapDetector<T> create(final List<T> intervals) { final OverlapDetector<T> detector = new OverlapDetector<>(0, 0); detector.addAll(intervals, intervals); return detector; }
/** * Read all the chains and load into an OverlapDetector. * @param chainFile File in UCSC chain format. * @return OverlapDetector will all Chains from reader loaded into it. */ static OverlapDetector<Chain> loadChains(final File chainFile) { final Set<Integer> ids = new HashSet<Integer>(); BufferedLineReader reader = new BufferedLineReader(IOUtil.openFileForReading(chainFile)); final OverlapDetector<Chain> ret = new OverlapDetector<Chain>(0, 0); Chain chain; while ((chain = Chain.loadChain(reader, chainFile.toString())) != null) { if (ids.contains(chain.id)) { throw new SAMException("Chain id " + chain.id + " appears more than once in chain file."); } ids.add(chain.id); ret.addLhs(chain, chain.interval); } reader.close(); return ret; }
static OverlapDetector<Interval> getOverlapDetectorFromIntervalListFile(File intervalList, int lhsBuffer, int rhsBuffer) { if (intervalList == null) { return null; } List<Interval> intervals = IntervalList.fromFile(intervalList).uniqued().getIntervals(); OverlapDetector<Interval> detector = new OverlapDetector<>(lhsBuffer, rhsBuffer); detector.addAll(intervals, intervals); return detector; }
@Test(dataProvider = "intervalsMultipleContigs") public void testOverlap(final List<Locatable> input, final Locatable query, final Collection<Locatable> expected) throws Exception { final OverlapDetector<Locatable> targetDetector = new OverlapDetector<>(0, 0); targetDetector.addAll(input, input); final Collection<Locatable> actual = targetDetector.getOverlaps(query); Assert.assertEquals(actual, expected); }
static OverlapDetector<Interval> getOverlapDetectorFromIntervalListFile(File intervalList, int lhsBuffer, int rhsBuffer) { if (intervalList == null) { return null; } List<Interval> intervals = IntervalList.fromFile(intervalList).uniqued().getIntervals(); OverlapDetector<Interval> detector = new OverlapDetector<>(lhsBuffer, rhsBuffer); detector.addAll(intervals, intervals); return detector; }
/** * Read all the chains and load into an OverlapDetector. * @param chainFile File in UCSC chain format. * @return OverlapDetector will all Chains from reader loaded into it. */ static OverlapDetector<Chain> loadChains(final File chainFile) { final BufferedLineReader reader = new BufferedLineReader(IOUtil.openFileForReading(chainFile)); final OverlapDetector<Chain> ret = new OverlapDetector<Chain>(0, 0); Chain chain; while ((chain = Chain.loadChain(reader, chainFile.toString())) != null) { ret.addLhs(chain, chain.interval); } reader.close(); return ret; }
/** * Read all the chains and load into an OverlapDetector. * @param chainFile File in UCSC chain format. * @return OverlapDetector will all Chains from reader loaded into it. */ static OverlapDetector<Chain> loadChains(final File chainFile) { final BufferedLineReader reader = new BufferedLineReader(IOUtil.openFileForReading(chainFile)); final OverlapDetector<Chain> ret = new OverlapDetector<Chain>(0, 0); Chain chain; while ((chain = Chain.loadChain(reader, chainFile.toString())) != null) { ret.addLhs(chain, chain.interval); } reader.close(); return ret; }
@Test public void ensureOverlapExclusionTest() { final OverlapDetector<Interval> oneTinyIntervalDetector = new OverlapDetector<Interval>(0, 0); final Interval theInterval = new Interval("1", 5, 10); oneTinyIntervalDetector.addLhs(theInterval, theInterval); final VcfFileSegmentGenerator noFilter = VcfFileSegmentGenerator.byWholeContigSubdividingWithWidth(TEN_MILLION); Assert.assertEquals(Iterables.size(noFilter.forVcf(VCF_WITH_LOGS_OF_GAPS)), 382); // The number of subdivisions of 10 million of this vcf final VcfFileSegmentGenerator allFiltered = VcfFileSegmentGenerator.excludingNonOverlaps(noFilter, oneTinyIntervalDetector); Assert.assertEquals(Iterables.size(allFiltered.forVcf(VCF_WITH_LOGS_OF_GAPS)), 1); } }
@Test public void testNoOverlapsAny() throws Exception { final List<Locatable> input = Arrays.asList( new Interval("1",10,110) ); final OverlapDetector<Locatable> trimmedTargetDetector = new OverlapDetector(20,20); trimmedTargetDetector.addAll(input, input); Assert.assertFalse(trimmedTargetDetector.overlapsAny( new Interval("1",50,85)));//no overlap because of trim Assert.assertTrue(trimmedTargetDetector.getOverlaps( new Interval("1",50,85)).isEmpty());//no overlap because of trim final OverlapDetector<Locatable> untrimmedTargetDetector = new OverlapDetector(0,0); untrimmedTargetDetector.addAll(input, input); Assert.assertTrue(untrimmedTargetDetector.overlapsAny( new Interval("1",50,85)));//overlaps - no trim }
public static OverlapDetector<Interval> makeOverlapDetector(final File samFile, final SAMFileHeader header, final File ribosomalIntervalsFile, final Log log) { final OverlapDetector<Interval> ribosomalSequenceOverlapDetector = new OverlapDetector<Interval>(0, 0); if (ribosomalIntervalsFile != null) { final IntervalList ribosomalIntervals = IntervalList.fromFile(ribosomalIntervalsFile); if (ribosomalIntervals.size() == 0) { log.warn("The RIBOSOMAL_INTERVALS file, " + ribosomalIntervalsFile.getAbsolutePath() + " does not contain intervals"); } try { SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), ribosomalIntervals.getHeader().getSequenceDictionary()); } catch (SequenceUtil.SequenceListsDifferException e) { throw new PicardException("Sequence dictionaries differ in " + samFile.getAbsolutePath() + " and " + ribosomalIntervalsFile.getAbsolutePath(), e); } final IntervalList uniquedRibosomalIntervals = ribosomalIntervals.uniqued(); final List<Interval> intervals = uniquedRibosomalIntervals.getIntervals(); ribosomalSequenceOverlapDetector.addAll(intervals, intervals); } return ribosomalSequenceOverlapDetector; }
public static OverlapDetector<Interval> makeOverlapDetector(final File samFile, final SAMFileHeader header, final File ribosomalIntervalsFile, final Log log) { final OverlapDetector<Interval> ribosomalSequenceOverlapDetector = new OverlapDetector<Interval>(0, 0); if (ribosomalIntervalsFile != null) { final IntervalList ribosomalIntervals = IntervalList.fromFile(ribosomalIntervalsFile); if (ribosomalIntervals.size() == 0) { log.warn("The RIBOSOMAL_INTERVALS file, " + ribosomalIntervalsFile.getAbsolutePath() + " does not contain intervals"); } try { SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), ribosomalIntervals.getHeader().getSequenceDictionary()); } catch (SequenceUtil.SequenceListsDifferException e) { throw new PicardException("Sequence dictionaries differ in " + samFile.getAbsolutePath() + " and " + ribosomalIntervalsFile.getAbsolutePath(), e); } final IntervalList uniquedRibosomalIntervals = ribosomalIntervals.uniqued(); final List<Interval> intervals = uniquedRibosomalIntervals.getIntervals(); ribosomalSequenceOverlapDetector.addAll(intervals, intervals); } return ribosomalSequenceOverlapDetector; }
/** Calculates a few statistics about the bait design that can then be output. */ void calculateStatistics(final IntervalList targets, final IntervalList baits) { this.TARGET_TERRITORY = (int) targets.getUniqueBaseCount(); this.TARGET_COUNT = targets.size(); this.BAIT_TERRITORY = (int) baits.getUniqueBaseCount(); this.BAIT_COUNT = baits.size(); this.DESIGN_EFFICIENCY = this.TARGET_TERRITORY / (double) this.BAIT_TERRITORY; // Figure out the intersection between all targets and all baits final IntervalList tmp = new IntervalList(targets.getHeader()); final OverlapDetector<Interval> detector = new OverlapDetector<Interval>(0, 0); detector.addAll(baits.getIntervals(), baits.getIntervals()); for (final Interval target : targets) { final Collection<Interval> overlaps = detector.getOverlaps(target); if (overlaps.isEmpty()) { this.ZERO_BAIT_TARGETS++; } else { for (final Interval i : overlaps) tmp.add(target.intersect(i)); } } tmp.uniqued(); this.BAIT_TARGET_TERRITORY_INTERSECTION = (int) tmp.getBaseCount(); }
/** Calculates a few statistics about the bait design that can then be output. */ void calculateStatistics(final IntervalList targets, final IntervalList baits) { this.TARGET_TERRITORY = (int) targets.getUniqueBaseCount(); this.TARGET_COUNT = targets.size(); this.BAIT_TERRITORY = (int) baits.getUniqueBaseCount(); this.BAIT_COUNT = baits.size(); this.DESIGN_EFFICIENCY = this.TARGET_TERRITORY / (double) this.BAIT_TERRITORY; // Figure out the intersection between all targets and all baits final IntervalList tmp = new IntervalList(targets.getHeader()); final OverlapDetector<Interval> detector = new OverlapDetector<Interval>(0, 0); detector.addAll(baits.getIntervals(), baits.getIntervals()); for (final Interval target : targets) { final Collection<Interval> overlaps = detector.getOverlaps(target); if (overlaps.isEmpty()) { this.ZERO_BAIT_TARGETS++; } else { for (final Interval i : overlaps) tmp.add(target.intersect(i)); } } tmp.uniqued(); this.BAIT_TARGET_TERRITORY_INTERSECTION = (int) tmp.getBaseCount(); }
/** * A utility function for generating the intersection of two IntervalLists, checks for equal dictionaries. * * @param list1 the first IntervalList * @param list2 the second IntervalList * @return the intersection of list1 and list2. */ public static IntervalList intersection(final IntervalList list1, final IntervalList list2) { final IntervalList result; // Ensure that all the sequence dictionaries agree and merge the lists SequenceUtil.assertSequenceDictionariesEqual(list1.getHeader().getSequenceDictionary(), list2.getHeader().getSequenceDictionary()); result = new IntervalList(list1.getHeader().clone()); final OverlapDetector<Interval> detector = new OverlapDetector<>(0, 0); detector.addAll(list1.getIntervals(), list1.getIntervals()); for (final Interval i : list2.getIntervals()) { final Collection<Interval> as = detector.getOverlaps(i); for (final Interval j : as) { final Interval tmp = i.intersect(j); result.add(tmp); } } return result.uniqued(); }
/** * A utility function for generating the intersection of two IntervalLists, checks for equal dictionaries. * * @param list1 the first IntervalList * @param list2 the second IntervalList * @return the intersection of list1 and list2. */ public static IntervalList intersection(final IntervalList list1, final IntervalList list2) { final IntervalList result; // Ensure that all the sequence dictionaries agree and merge the lists SequenceUtil.assertSequenceDictionariesEqual(list1.getHeader().getSequenceDictionary(), list2.getHeader().getSequenceDictionary()); result = new IntervalList(list1.getHeader().clone()); final OverlapDetector<Interval> detector = new OverlapDetector<Interval>(0, 0); detector.addAll(list1.getIntervals(), list1.getIntervals()); for (final Interval i : list2.getIntervals()) { final Collection<Interval> as = detector.getOverlaps(i); for (final Interval j : as) { final Interval tmp = i.intersect(j); result.add(tmp); } } return result.uniqued(); }
@Test public void testSamLocusAndReferenceIterator() { final File reference = new File(TEST_DATA_DIR, "Homo_sapiens_assembly18.trimmed.fasta"); final File samFile = new File(TEST_DATA_DIR, "simpleSmallFile.sam"); final ReferenceSequenceFile referenceSequenceFile = new FastaSequenceFile(reference, false); final ReferenceSequenceFileWalker referenceSequenceFileWalker = new ReferenceSequenceFileWalker(referenceSequenceFile); final SamReader samReader = SamReaderFactory.makeDefault().open(samFile); final SamLocusIterator samLocusIterator = new SamLocusIterator(samReader); final SamLocusAndReferenceIterator samLocusAndReferences = new SamLocusAndReferenceIterator(referenceSequenceFileWalker, samLocusIterator); IntervalList intervalList = new IntervalList(samReader.getFileHeader()); intervalList.add(new Interval("chrM", 1, 36)); intervalList.add(new Interval("chr20", 8401, 8460)); OverlapDetector<Interval> overlapDetector = new OverlapDetector<>(0, 0); overlapDetector.addAll(intervalList.getIntervals(), intervalList.getIntervals()); for (final SamLocusAndReferenceIterator.SAMLocusAndReference samLocusAndReference : samLocusAndReferences) { // The sam file only has coverage in the intervals that are within 'intervalList', and there the coverage should // be exactly 2 since there are two overlapping, paired reads. This is what this test is testing: Assert.assertEquals(samLocusAndReference.getRecordAndOffsets().size(), overlapDetector.overlapsAny(samLocusAndReference.getLocus()) ? 2 : 0, "Position:" + samLocusAndReference.getLocus().toString()); // all the reads are equal to the reference...this is what this test is testing. for (final SamLocusIterator.RecordAndOffset recordAndOffset : samLocusAndReference.getRecordAndOffsets()) Assert.assertTrue(SequenceUtil.basesEqual(samLocusAndReference.getReferenceBase(), recordAndOffset.getReadBase()), "Record: " + recordAndOffset.getRecord() + " Position:" + samLocusAndReference.getLocus().toString()); } }
/** * A utility function for generating the intersection of two IntervalLists, checks for equal dictionaries. * * @param list1 the first IntervalList * @param list2 the second IntervalList * @return the intersection of list1 and list2. */ public static IntervalList intersection(final IntervalList list1, final IntervalList list2) { final IntervalList result; // Ensure that all the sequence dictionaries agree and merge the lists SequenceUtil.assertSequenceDictionariesEqual(list1.getHeader().getSequenceDictionary(), list2.getHeader().getSequenceDictionary()); result = new IntervalList(list1.getHeader().clone()); final OverlapDetector<Interval> detector = new OverlapDetector<>(0, 0); detector.addAll(list1.getIntervals(), list1.getIntervals()); for (final Interval i : list2.getIntervals()) { final Collection<Interval> as = detector.getOverlaps(i); for (final Interval j : as) { final Interval tmp = i.intersect(j); result.add(tmp); } } return result.uniqued(); }