/** Merges a sorted collection of intervals and optionally concatenates unique names or takes the first name. */ static Interval merge(final SortedSet<Interval> intervals, final boolean concatenateNames) { final String chrom = intervals.first().getSequence(); int start = intervals.first().getStart(); int end = intervals.last().getEnd(); final boolean neg = intervals.first().isNegativeStrand(); final LinkedHashSet<String> names = new LinkedHashSet<String>(); final String name; for (final Interval i : intervals) { if (i.getName() != null) names.add(i.getName()); start = Math.min(start, i.getStart()); end = Math.max(end, i.getEnd()); } if (concatenateNames) { name = StringUtil.join("|", names); } else { name = names.iterator().next(); } return new Interval(chrom, start, end, neg, name); }
/** * Merges a sorted collection of intervals and optionally concatenates unique names or takes the first name. */ static Interval merge(final SortedSet<Interval> intervals, final boolean concatenateNames) { final String chrom = intervals.first().getContig(); int start = intervals.first().getStart(); int end = intervals.last().getEnd(); final boolean neg = intervals.first().isNegativeStrand(); final LinkedHashSet<String> names = new LinkedHashSet<>(); final String name; for (final Interval i : intervals) { if (i.getName() != null) { names.add(i.getName()); } start = Math.min(start, i.getStart()); end = Math.max(end, i.getEnd()); } if (names.isEmpty()) { name = null; } else if (concatenateNames) { name = StringUtil.join("|", names); } else { name = names.iterator().next(); } return new Interval(chrom, start, end, neg, name); }
public List<PartialLiftover> diagnosticLiftover(final Interval interval) { final List<PartialLiftover> ret = new ArrayList<PartialLiftover>(); if (interval.length() == 0) { throw new IllegalArgumentException("Zero-length interval cannot be lifted over. Interval: " + interval.getName()); } for (final Chain chain : chains.getOverlaps(interval)) { Interval intersectingChain = interval.intersect(chain.interval); final TargetIntersection targetIntersection = targetIntersection(chain, intersectingChain); if (targetIntersection == null) { ret.add(new PartialLiftover(intersectingChain, chain.id)); } else { Interval toInterval = createToInterval(interval.getName(), targetIntersection); float percentLiftedOver = targetIntersection.intersectionLength/(float)interval.length(); ret.add(new PartialLiftover(intersectingChain, toInterval, targetIntersection.chain.id, percentLiftedOver)); } } return ret; }
public int compare(final Interval lhs, final Interval rhs) { final int lhsIndex = this.header.getSequenceIndex(lhs.getSequence()); final int rhsIndex = this.header.getSequenceIndex(rhs.getSequence()); int retval = lhsIndex - rhsIndex; if (retval == 0) retval = lhs.getStart() - rhs.getStart(); if (retval == 0) retval = lhs.getEnd() - rhs.getEnd(); if (retval == 0) { if (lhs.isPositiveStrand() && rhs.isNegativeStrand()) retval = -1; else if (lhs.isNegativeStrand() && rhs.isPositiveStrand()) retval = 1; } if (retval == 0) { if (lhs.getName() == null) { if (rhs.getName() == null) return 0; else return -1; } else if (rhs.getName() == null) { return 1; } else { return lhs.getName().compareTo(rhs.getName()); } } return retval; } }
public List<PartialLiftover> diagnosticLiftover(final Interval interval) { final List<PartialLiftover> ret = new ArrayList<PartialLiftover>(); if (interval.length() == 0) { throw new IllegalArgumentException("Zero-length interval cannot be lifted over. Interval: " + interval.getName()); } for (final Chain chain : chains.getOverlaps(interval)) { Interval intersectingChain = interval.intersect(chain.interval); final TargetIntersection targetIntersection = targetIntersection(chain, intersectingChain); if (targetIntersection == null) { ret.add(new PartialLiftover(intersectingChain, chain.id)); } else { Interval toInterval = createToInterval(interval.getName(), interval.isNegativeStrand(), targetIntersection); float percentLiftedOver = targetIntersection.intersectionLength/(float)interval.length(); ret.add(new PartialLiftover(intersectingChain, toInterval, targetIntersection.chain.id, percentLiftedOver)); } } return ret; }
public List<PartialLiftover> diagnosticLiftover(final Interval interval) { final List<PartialLiftover> ret = new ArrayList<PartialLiftover>(); if (interval.length() == 0) { throw new IllegalArgumentException("Zero-length interval cannot be lifted over. Interval: " + interval.getName()); } for (final Chain chain : chains.getOverlaps(interval)) { Interval intersectingChain = interval.intersect(chain.interval); final TargetIntersection targetIntersection = targetIntersection(chain, intersectingChain); if (targetIntersection == null) { ret.add(new PartialLiftover(intersectingChain, chain.id)); } else { Interval toInterval = createToInterval(interval.getName(), interval.isNegativeStrand(), targetIntersection); float percentLiftedOver = targetIntersection.intersectionLength/(float)interval.length(); ret.add(new PartialLiftover(intersectingChain, toInterval, targetIntersection.chain.id, percentLiftedOver)); } } return ret; }
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); IntervalList intervals = IntervalList.fromFile(INPUT); if (SORT) intervals = intervals.sorted(); try { final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT); for (final Interval i : intervals) { final String strand = i.isNegativeStrand() ? "-" : "+"; final List<?> fields = CollectionUtil.makeList(i.getContig(), i.getStart()-1, i.getEnd(), i.getName(), SCORE, strand); out.append(fields.stream().map(String::valueOf).collect(Collectors.joining("\t"))); out.newLine(); } out.close(); } catch (IOException ioe) { throw new RuntimeIOException(ioe); } return 0; } }
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); IntervalList intervals = IntervalList.fromFile(INPUT); if (SORT) intervals = intervals.sorted(); try { final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT); for (final Interval i : intervals) { final String strand = i.isNegativeStrand() ? "-" : "+"; final List<?> fields = CollectionUtil.makeList(i.getContig(), i.getStart()-1, i.getEnd(), i.getName(), SCORE, strand); out.append(fields.stream().map(String::valueOf).collect(Collectors.joining("\t"))); out.newLine(); } out.close(); } catch (IOException ioe) { throw new RuntimeIOException(ioe); } return 0; } }
@Override List<Bait> design(final BaitDesigner designer, final Interval target, final ReferenceSequence reference) { final List<Bait> baits = new LinkedList<Bait>(); final int baitSize = designer.BAIT_SIZE; final int baitOffset = designer.BAIT_OFFSET; final int lastPossibleBaitStart = Math.min(target.getEnd(), reference.length() - baitSize); final int baitCount = 1 + (int) Math.floor((lastPossibleBaitStart - target.getStart()) / (double) baitOffset); int i = 0; for (int start = target.getStart(); start < lastPossibleBaitStart; start += baitOffset) { final Bait bait = new Bait(target.getContig(), start, CoordMath.getEnd(start, baitSize), target.isNegativeStrand(), designer.makeBaitName(target.getName(), ++i, baitCount)); bait.addBases(reference, designer.DESIGN_ON_TARGET_STRAND); baits.add(bait); } return baits; } };
@Override List<Bait> design(final BaitDesigner designer, final Interval target, final ReferenceSequence reference) { final List<Bait> baits = new LinkedList<Bait>(); final int baitSize = designer.BAIT_SIZE; final int baitOffset = designer.BAIT_OFFSET; final int lastPossibleBaitStart = Math.min(target.getEnd(), reference.length() - baitSize); final int baitCount = 1 + (int) Math.floor((lastPossibleBaitStart - target.getStart()) / (double) baitOffset); int i = 0; for (int start = target.getStart(); start < lastPossibleBaitStart; start += baitOffset) { final Bait bait = new Bait(target.getContig(), start, CoordMath.getEnd(start, baitSize), target.isNegativeStrand(), designer.makeBaitName(target.getName(), ++i, baitCount)); bait.addBases(reference, designer.DESIGN_ON_TARGET_STRAND); baits.add(bait); } return baits; } };
@Test public void testUnionSamePosition() { final IntervalList iList= new IntervalList(fileHeader); final List<Interval> intervals = Arrays.asList( new Interval("1", 2, 100, true, "test1"), new Interval("1", 2, 100, true, "test2") ); iList.addall(intervals); final List<Interval> uniqued = iList.uniqued().getIntervals(); Assert.assertEquals(uniqued.size(),1); Assert.assertEquals(uniqued.get(0).getName(),"test1|test2"); }
@Override protected int doWork() { IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); IOUtil.assertFileIsWritable(OUTPUT); final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE, true); if (!refFile.isIndexed()) { throw new IllegalStateException("Reference file must be indexed, but no index file was found"); } if (refFile.getSequenceDictionary() == null) { throw new IllegalStateException("Reference file must include a dictionary, but no dictionary file was found"); } // get the intervals final IntervalList intervals = segregateReference(refFile, MAX_TO_MERGE); log.info(String.format("Found %d intervals in %d loci during %s seconds", intervalProgress.getCount(), locusProgress.getCount(), locusProgress.getElapsedSeconds())); /********************************** * Now output regions for calling * **********************************/ final IntervalList outputIntervals = new IntervalList(intervals.getHeader().clone()); log.info(String.format("Collecting requested type of intervals (%s)", OUTPUT_TYPE)); intervals.getIntervals().stream().filter(i -> OUTPUT_TYPE.accepts(i.getName())).forEach(outputIntervals::add); log.info("Writing Intervals."); outputIntervals.write(OUTPUT); log.info(String.format("Execution ending. Total time %d seconds", locusProgress.getElapsedSeconds())); return 0; }
@Override protected int doWork() { IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); IOUtil.assertFileIsWritable(OUTPUT); final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE, true); if (!refFile.isIndexed()) { throw new IllegalStateException("Reference file must be indexed, but no index file was found"); } if (refFile.getSequenceDictionary() == null) { throw new IllegalStateException("Reference file must include a dictionary, but no dictionary file was found"); } // get the intervals final IntervalList intervals = segregateReference(refFile, MAX_TO_MERGE); log.info(String.format("Found %d intervals in %d loci during %s seconds", intervalProgress.getCount(), locusProgress.getCount(), locusProgress.getElapsedSeconds())); /********************************** * Now output regions for calling * **********************************/ final IntervalList outputIntervals = new IntervalList(intervals.getHeader().clone()); log.info(String.format("Collecting requested type of intervals (%s)", OUTPUT_TYPE)); intervals.getIntervals().stream().filter(i -> OUTPUT_TYPE.accepts(i.getName())).forEach(outputIntervals::add); log.info("Writing Intervals."); outputIntervals.write(OUTPUT); log.info(String.format("Execution ending. Total time %d seconds", locusProgress.getElapsedSeconds())); return 0; }
/** * Returns a new IntervalList where each interval is padded by the specified amount of bases. */ public IntervalList padded(final int before, final int after) { if (before < 0 || after < 0) { throw new IllegalArgumentException("Padding values must be >= 0."); } final IntervalList padded = new IntervalList(this.getHeader().clone()); final SAMSequenceDictionary dict = padded.getHeader().getSequenceDictionary(); for (final Interval i : this) { final SAMSequenceRecord seq = dict.getSequence(i.getContig()); final int start = Math.max(1, i.getStart() - before); final int end = Math.min(seq.getSequenceLength(), i.getEnd() + after); padded.add(new Interval(i.getContig(), start, end, i.isNegativeStrand(), i.getName())); } return padded; }
/** * Returns a new IntervalList where each interval is padded by the specified amount of bases. */ public IntervalList padded(final int before, final int after) { if (before < 0 || after < 0) { throw new IllegalArgumentException("Padding values must be >= 0."); } final IntervalList padded = new IntervalList(this.getHeader().clone()); final SAMSequenceDictionary dict = padded.getHeader().getSequenceDictionary(); for (final Interval i : this) { final SAMSequenceRecord seq = dict.getSequence(i.getContig()); final int start = Math.max(1, i.getStart() - before); final int end = Math.min(seq.getSequenceLength(), i.getEnd() + after); padded.add(new Interval(i.getContig(), start, end, i.isNegativeStrand(), i.getName())); } return padded; }
@Test(dataProvider = "VCFCompData") public void testFromVCF(final Path vcf, final Path compInterval, final boolean invertVCF) { final IntervalList compList = IntervalList.fromPath(compInterval); final IntervalList list = invertVCF ? IntervalList.invert(VCFFileReader.toIntervalList(vcf)) : VCFFileReader.toIntervalList(vcf); compList.getHeader().getSequenceDictionary().assertSameDictionary(list.getHeader().getSequenceDictionary()); final Collection<Interval> intervals = CollectionUtil.makeCollection(list.iterator()); final Collection<Interval> compIntervals = CollectionUtil.makeCollection(compList.iterator()); //assert that the intervals correspond Assert.assertEquals(intervals, compIntervals); final List<String> intervalNames = new LinkedList<>(); final List<String> compIntervalNames = new LinkedList<>(); for (final Interval interval : intervals) { intervalNames.add(interval.getName()); } for (final Interval interval : compIntervals) { compIntervalNames.add(interval.getName()); } //assert that the names match Assert.assertEquals(intervalNames, compIntervalNames); }
@Test(dataProvider = "VCFCompData") public void testFromVCFWithPath(final Path vcf, final Path compInterval, final boolean invertVCF) { final IntervalList compList = IntervalList.fromPath(compInterval); final IntervalList list = invertVCF ? IntervalList.invert(VCFFileReader.toIntervalList(vcf)) : VCFFileReader.toIntervalList(vcf); compList.getHeader().getSequenceDictionary().assertSameDictionary(list.getHeader().getSequenceDictionary()); final Collection<Interval> intervals = CollectionUtil.makeCollection(list.iterator()); final Collection<Interval> compIntervals = CollectionUtil.makeCollection(compList.iterator()); //assert that the intervals correspond Assert.assertEquals(intervals, compIntervals); final List<String> intervalNames = new LinkedList<>(); final List<String> compIntervalNames = new LinkedList<>(); for (final Interval interval : intervals) { intervalNames.add(interval.getName()); } for (final Interval interval : compIntervals) { compIntervalNames.add(interval.getName()); } //assert that the names match Assert.assertEquals(intervalNames, compIntervalNames); }
@Override public List<Interval> takeSome(final Interval interval, final long idealSplitWeight, final long currentSize, final double projectSizeOfRemaining) { final long amount = idealSplitWeight - currentSize; if (amount >= interval.length()) { return CollectionUtil.makeList(interval, null); } if (amount == 0) { return CollectionUtil.makeList(null, interval); } final Interval left = new Interval( interval.getContig(), interval.getStart(), interval.getStart() + (int) amount - 1, interval.isNegativeStrand(), interval.getName() ); final Interval right = new Interval( interval.getContig(), interval.getStart() + (int) amount, interval.getEnd(), interval.isNegativeStrand(), interval.getName() ); return CollectionUtil.makeList(left, right); } }
@Override public List<Interval> takeSome(final Interval interval, final long idealSplitWeight, final long currentSize, final double projectSizeOfRemaining) { final long amount = idealSplitWeight - currentSize; if (amount >= interval.length()) { return CollectionUtil.makeList(interval, null); } if (amount == 0) { return CollectionUtil.makeList(null, interval); } final Interval left = new Interval( interval.getContig(), interval.getStart(), interval.getStart() + (int) amount - 1, interval.isNegativeStrand(), interval.getName() ); final Interval right = new Interval( interval.getContig(), interval.getStart() + (int) amount, interval.getEnd(), interval.isNegativeStrand(), interval.getName() ); return CollectionUtil.makeList(left, right); } }
private static IntervalList composeIntervalList(final IntervalList source, final String chromosome, final int... segmentsByPair) { final IntervalList intervals = new IntervalList(source.getHeader()); for (int i = 0; i < segmentsByPair.length; i += 2) { final Interval parentInterval = lookupIntervalContainingLocus(source, chromosome, segmentsByPair[i]); intervals.add(new Interval(chromosome, segmentsByPair[i], segmentsByPair[i + 1], parentInterval.isNegativeStrand(), parentInterval.getName())); } return intervals; }