/** Calculates a few statistics about the bait design that can then be output. */ void calculateStatistics(final IntervalList targets, final IntervalList baits) { this.TARGET_TERRITORY = (int) targets.getUniqueBaseCount(); this.TARGET_COUNT = targets.size(); this.BAIT_TERRITORY = (int) baits.getUniqueBaseCount(); this.BAIT_COUNT = baits.size(); this.DESIGN_EFFICIENCY = this.TARGET_TERRITORY / (double) this.BAIT_TERRITORY; // Figure out the intersection between all targets and all baits final IntervalList tmp = new IntervalList(targets.getHeader()); final OverlapDetector<Interval> detector = new OverlapDetector<Interval>(0, 0); detector.addAll(baits.getIntervals(), baits.getIntervals()); for (final Interval target : targets) { final Collection<Interval> overlaps = detector.getOverlaps(target); if (overlaps.isEmpty()) { this.ZERO_BAIT_TARGETS++; } else { for (final Interval i : overlaps) tmp.add(target.intersect(i)); } } tmp.uniqued(); this.BAIT_TARGET_TERRITORY_INTERSECTION = (int) tmp.getBaseCount(); }
static OverlapDetector<Interval> getOverlapDetectorFromIntervalListFile(File intervalList, int lhsBuffer, int rhsBuffer) { if (intervalList == null) { return null; } List<Interval> intervals = IntervalList.fromFile(intervalList).uniqued().getIntervals(); OverlapDetector<Interval> detector = new OverlapDetector<>(lhsBuffer, rhsBuffer); detector.addAll(intervals, intervals); return detector; }
/** * A utility function for finding the union of a list of IntervalLists, checks for equal dictionaries. * also looks for overlapping intervals, uniquifies, and sorts (by coordinate) * * @param lists the list of IntervalList * @return the union of all the IntervalLists in lists. */ public static IntervalList union(final Collection<IntervalList> lists) { final IntervalList merged = concatenate(lists); return merged.uniqued(); }
/** * Gets the count of unique bases represented by the intervals in the list. */ public long getUniqueBaseCount() { return uniqued().getBaseCount(); }
/** * A utility function for subtracting a collection of IntervalLists from another. Resulting loci are those that are in the first collection * but not the second. * * @param listsToSubtractFrom the collection of IntervalList from which to subtract intervals * @param listsToSubtract the collection of intervals to subtract * @return an IntervalLists comprising all loci that are in first collection but not second. */ public static IntervalList subtract(final Collection<IntervalList> listsToSubtractFrom, final Collection<IntervalList> listsToSubtract) { return intersection( union(listsToSubtractFrom), invert(union(listsToSubtract))); }
final IntervalList possiblySortedResult = SORT ? result.sorted() : result; final IntervalList possiblyInvertedResult = INVERT ? IntervalList.invert(possiblySortedResult) : possiblySortedResult; List<Interval> finalIntervals = UNIQUE ? possiblyInvertedResult.uniqued().getIntervals() : possiblyInvertedResult.getIntervals(); finalIntervals = IntervalList.breakIntervalsAtBandMultiples(finalIntervals, BREAK_BANDS_AT_MULTIPLES_OF); final SAMFileHeader header = result.getHeader(); final Set<String> pgs = new HashSet<>(); for (final SAMProgramRecord pg : header.getProgramRecords()) pgs.add(pg.getId()); final IntervalList output = new IntervalList(header); for (final Interval i : finalIntervals) { output.add(i); output.write(OUTPUT); resultIntervals = Collections.singletonList(output); } else { long intervalCount = 0; for (final IntervalList finalInterval : resultIntervals) { totalBaseCount += finalInterval.getBaseCount(); intervalCount += finalInterval.size();
final IntervalList intervalList = IntervalList.fromFile(INPUT); final IntervalList rejects = new IntervalList(intervalList.getHeader()); final long baseCount = intervalList.getBaseCount(); LOG.info("Lifting over " + intervalList.getIntervals().size() + " intervals, encompassing " + baseCount + " bases."); final IntervalList toIntervals = new IntervalList(toHeader); for (final Interval fromInterval : intervalList) { final Interval toInterval = liftOver.liftOver(fromInterval); if (toInterval != null) { toIntervals.add(toInterval); } else { rejects.add(fromInterval); LOG.warn("Liftover failed for ", fromInterval, " (len ", fromInterval.length(), ")"); final List<LiftOver.PartialLiftover> partials = liftOver.diagnosticLiftover(fromInterval); toIntervals.sorted().write(OUTPUT); rejects.write(REJECT); final long rejectBaseCount = rejects.getBaseCount(); rejects.getIntervals().size(), intervalList.getIntervals().size(), 100 * rejects.getIntervals().size() / (double) intervalList.getIntervals().size(), rejectBaseCount, baseCount, 100 * rejectBaseCount / (double) baseCount ));
final SAMFileHeader header = lists1.iterator().next().getHeader().clone(); header.setSortOrder(SAMFileHeader.SortOrder.unsorted); final IntervalList overlapIntervals = new IntervalList(header); for (final IntervalList list : lists2) { SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), list.getHeader().getSequenceDictionary()); overlapIntervals.addall(list.getIntervals()); for (final Interval interval : overlapIntervals.sorted().uniqued()) { detector.addLhs(dummy, interval); final IntervalList merged = new IntervalList(header); for (final IntervalList list : lists1) { SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), list.getHeader().getSequenceDictionary()); for (final Interval interval : list.getIntervals()) { if (detector.overlapsAny(interval)) { merged.add(interval);
/** * Creates an IntervalList from the given sequence name * * @param header header to use to create IntervalList * @param sequenceName name of sequence in header * @return a new intervalList with given header that contains the reference name */ public static IntervalList fromName(final SAMFileHeader header, final String sequenceName) { final IntervalList ref = new IntervalList(header); ref.add(new Interval(sequenceName, 1, header.getSequence(sequenceName).getSequenceLength())); return ref; }
@DataProvider public static Iterator<Object[]> testScatterTestcases() { final List<Testcase> testCases = new ArrayList<>(); Assert.assertEquals(LIST_TO_SCATTER.getUniqueBaseCount(), 200, "Wrong unique base count"); Assert.assertEquals(LIST_TO_SCATTER_MANY.getUniqueBaseCount(), 32 * 2, "Wrong unique base count"); Assert.assertEquals(LIST_TO_SCATTER_MANY.getIntervals().size(), 32, "Wrong unique interval count"); final IntervalList full = new IntervalList(LIST_TO_SCATTER_MANY.getHeader()); full.add(new Interval("1", 30000, 30000 + 32 * 2 - 1)); Collections.singletonList(IntervalList.overlaps(LIST_TO_SCATTER_MANY, full)))); final IntervalList half = new IntervalList(LIST_TO_SCATTER_MANY.getHeader()); half.add(new Interval("1", 30000, 30000 + 16 * 2 - 1)); testCases.add(new Testcase( SCATTER_MANY_INTERVAL_FILE, 2, IntervalListScatterMode.INTERVAL_COUNT, Arrays.asList(IntervalList.overlaps(LIST_TO_SCATTER_MANY, half), IntervalList.overlaps(LIST_TO_SCATTER_MANY, IntervalList.invert(half))))); final IntervalList third = new IntervalList(LIST_TO_SCATTER_MANY.getHeader()); third.add(new Interval("1", 30000, 30000 + 10 * 2 - 1)); final IntervalList secondThird = new IntervalList(LIST_TO_SCATTER_MANY.getHeader()); secondThird.add(new Interval("1", 30000 + 10 * 2, 30000 + 20 * 2 - 1)); testCases.add(new Testcase( SCATTER_MANY_INTERVAL_FILE, 3, IntervalListScatterMode.INTERVAL_COUNT, Arrays.asList(IntervalList.overlaps(LIST_TO_SCATTER_MANY, third), IntervalList.overlaps(LIST_TO_SCATTER_MANY, secondThird), IntervalList.overlaps(LIST_TO_SCATTER_MANY, IntervalList.invert(IntervalList.concatenate(Arrays.asList(
IntervalList runningIntervalList = new IntervalList(processedIntervalList.getHeader()); final ArrayDeque<Interval> intervalQueue = new ArrayDeque<>(processedIntervalList.getIntervals()); runningIntervalList = new IntervalList(processedIntervalList.getHeader()); } else { runningIntervalList.add(split.get(0)); runningIntervalList.addall(intervalQueue); if (!runningIntervalList.getIntervals().isEmpty()) { accumulatedIntervalLists.add(runningIntervalList);
for (final File f : INTERVALS) { IOUtil.assertFileIsReadable(f); final IntervalList tmpIntervalList = IntervalList.fromFile(f); if (genomeBaseCount == 0) { // Don't count the reference length more than once. intervalsSamSequenceDictionary = tmpIntervalList.getHeader().getSequenceDictionary(); genomeBaseCount = intervalsSamSequenceDictionary.getReferenceLength(); else if (INTERSECT_INTERVALS) intervals = IntervalList.intersection(intervals, tmpIntervalList); else intervals = IntervalList.union(intervals, tmpIntervalList); intervals = intervals.uniqued(); final long baseCount = (intervals != null) ? intervals.getBaseCount() : truthReader.getFileHeader().getSequenceDictionary().getReferenceLength(); addMissingTruthAndMissingCallStates(snpCounter.getCounterSize(), baseCount, snpCounter); addMissingTruthAndMissingCallStates(indelCounter.getCounterSize(), baseCount, indelCounter);
@Test(dataProvider = "invertData") public void testInvertSquared(final IntervalList list, @SuppressWarnings("UnusedParameters") final IntervalList ignored) throws Exception { final IntervalList inverseSquared = IntervalList.invert(IntervalList.invert(list)); final IntervalList originalClone = new IntervalList(list.getHeader()); for (final Interval interval : list) { originalClone.add(interval); } Assert.assertEquals( CollectionUtil.makeCollection(inverseSquared.iterator()), CollectionUtil.makeCollection(originalClone.uniqued().iterator())); }
/** * Returns a new IntervalList where each interval is padded by the specified amount of bases. */ public IntervalList padded(final int before, final int after) { if (before < 0 || after < 0) { throw new IllegalArgumentException("Padding values must be >= 0."); } final IntervalList padded = new IntervalList(this.getHeader().clone()); final SAMSequenceDictionary dict = padded.getHeader().getSequenceDictionary(); for (final Interval i : this) { final SAMSequenceRecord seq = dict.getSequence(i.getContig()); final int start = Math.max(1, i.getStart() - before); final int end = Math.min(seq.getSequenceLength(), i.getEnd() + after); padded.add(new Interval(i.getContig(), start, end, i.isNegativeStrand(), i.getName())); } return padded; }
public static OverlapDetector<Interval> makeOverlapDetector(final File samFile, final SAMFileHeader header, final File ribosomalIntervalsFile, final Log log) { final OverlapDetector<Interval> ribosomalSequenceOverlapDetector = new OverlapDetector<Interval>(0, 0); if (ribosomalIntervalsFile != null) { final IntervalList ribosomalIntervals = IntervalList.fromFile(ribosomalIntervalsFile); if (ribosomalIntervals.size() == 0) { log.warn("The RIBOSOMAL_INTERVALS file, " + ribosomalIntervalsFile.getAbsolutePath() + " does not contain intervals"); } try { SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), ribosomalIntervals.getHeader().getSequenceDictionary()); } catch (SequenceUtil.SequenceListsDifferException e) { throw new PicardException("Sequence dictionaries differ in " + samFile.getAbsolutePath() + " and " + ribosomalIntervalsFile.getAbsolutePath(), e); } final IntervalList uniquedRibosomalIntervals = ribosomalIntervals.uniqued(); final List<Interval> intervals = uniquedRibosomalIntervals.getIntervals(); ribosomalSequenceOverlapDetector.addAll(intervals, intervals); } return ribosomalSequenceOverlapDetector; }
/** Gets the intervals over which we will calculate metrics. */ protected IntervalList getIntervalsToExamine() { final IntervalList intervals; if (INTERVALS != null) { IOUtil.assertFileIsReadable(INTERVALS); intervals = IntervalList.fromFile(INTERVALS); } else { intervals = new IntervalList(this.header); for (final SAMSequenceRecord rec : this.header.getSequenceDictionary().getSequences()) { final Interval interval = new Interval(rec.getSequenceName(), 1, rec.getSequenceLength()); intervals.add(interval); } } return intervals; }
/** * A utility function for merging a list of IntervalLists, checks for equal dictionaries. * Merging does not look for overlapping intervals nor uniquify * * @param lists a list of IntervalList * @return the union of all the IntervalLists in lists. */ public static IntervalList concatenate(final Collection<IntervalList> lists) { if (lists.isEmpty()) { throw new SAMException("Cannot concatenate an empty list of IntervalLists."); } // Ensure that all the sequence dictionaries agree and merge the lists final SAMFileHeader header = lists.iterator().next().getHeader().clone(); header.setSortOrder(SAMFileHeader.SortOrder.unsorted); final IntervalList merged = new IntervalList(header); for (final IntervalList in : lists) { SequenceUtil.assertSequenceDictionariesEqual(merged.getHeader().getSequenceDictionary(), in.getHeader().getSequenceDictionary()); merged.addall(in.intervals); } return merged; }
@DataProvider(name = "overlapsSingletonData") public Object[][] overlapSingletonData() { final IntervalList two_overlaps_one = new IntervalList(fileHeader); final IntervalList three_overlaps_two = new IntervalList(fileHeader); final IntervalList three_overlaps_one = new IntervalList(fileHeader); final IntervalList one_overlaps_three = new IntervalList(fileHeader); two_overlaps_one.add(new Interval("1", 50, 150)); two_overlaps_one.add(new Interval("2", 1, 150)); two_overlaps_one.add(new Interval("2", 250, 270)); two_overlaps_one.add(new Interval("2", 290, 400)); three_overlaps_two.add(new Interval("1", 25, 400)); three_overlaps_two.add(new Interval("2", 200, 600)); three_overlaps_one.add(new Interval("1", 25, 400)); three_overlaps_one.add(new Interval("2", 200, 600)); one_overlaps_three.add(new Interval("1", 1, 100)); one_overlaps_three.add(new Interval("1", 101, 200)); one_overlaps_three.add(new Interval("1", 202, 300)); one_overlaps_three.add(new Interval("2", 200, 300)); new Object[]{list1, IntervalList.invert(list1), new IntervalList(list1.getHeader())}, // should be empty new Object[]{list2, list1, two_overlaps_one}, new Object[]{list3, list2, three_overlaps_two},
public IntervalListReferenceSequenceMask(final IntervalList intervalList) { this.header = intervalList.getHeader(); if (intervalList.getHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { intervalList.sorted(); } final List<Interval> uniqueIntervals = intervalList.uniqued().getIntervals(); if (uniqueIntervals.isEmpty()) { lastSequenceIndex = -1; lastPosition = 0; } else { final Interval lastInterval = uniqueIntervals.get(uniqueIntervals.size() - 1); lastSequenceIndex = header.getSequenceIndex((lastInterval.getContig())); lastPosition = lastInterval.getEnd(); } intervalIterator = new PeekableIterator<Interval>(uniqueIntervals.iterator()); }
final long genomeTerritory = this.intervals.getBaseCount() + otherMetric.intervals.getBaseCount(); this.intervals.addall(otherMetric.intervals.getIntervals()); this.intervals = this.intervals.uniqued(); if (this.intervals.getBaseCount() != genomeTerritory) { throw new PicardException("Trying to merge WgsMetrics calculated on intervals that overlap.");