return clone(); if(bamFileSpan.isEmpty()) return clone(); validateSorted(); final BAMFileSpan trimmedChunkList = new BAMFileSpan(); final long chunkEnd = bamFileSpan.chunks.get(bamFileSpan.chunks.size() - 1).getChunkEnd(); for(final Chunk chunkToTrim: chunks) { if(chunkToTrim.getChunkEnd() <= chunkEnd) { trimmedChunkList.add(chunkToTrim.clone()); trimmedChunkList.add(new Chunk(chunkToTrim.getChunkStart(),chunkEnd));
@Override public CloseableIterator<SAMRecord> getIterator(final SAMFileSpan chunks) { if (mStream == null) { throw new IllegalStateException("File reader is closed"); } if (mCurrentIterator != null) { throw new IllegalStateException("Iteration in progress"); } if (!(chunks instanceof BAMFileSpan)) { throw new IllegalStateException("BAMFileReader cannot handle this type of file span."); } // Create an iterator over the given chunk boundaries. mCurrentIterator = new BAMFileIndexIterator(((BAMFileSpan)chunks).toCoordinateArray()); return mCurrentIterator; }
@Override public BAMFileSpan getSpanOverlapping(int referenceIndex, int startPos, int endPos) { BinList binList = getBinsOverlapping(referenceIndex, startPos, endPos); BAMFileSpan result = new BAMFileSpan(); Set<Chunk> savedChunks = new HashSet<Chunk>(); for (Bin bin : binList) { List<Chunk> chunks = getSpanOverlapping(bin).getChunks(); for (Chunk chunk : chunks) { if (!savedChunks.contains(chunk)) { savedChunks.add(chunk); result.add(chunk); } } } return result; }
private static long[] coordinatesFromQueryIntervals(BAMIndex index, QueryInterval[] queries) { ArrayList<BAMFileSpan> spanList = new ArrayList<>(1); Arrays.asList(queries).forEach(qi -> spanList.add(index.getSpanOverlapping(qi.referenceIndex, qi.start, qi.end))); BAMFileSpan spanArray[] = new BAMFileSpan[spanList.size()]; for (int i = 0; i < spanList.size(); i++) { spanArray[i] = spanList.get(i); } return BAMFileSpan.merge(spanArray).toCoordinateArray(); }
/** * The list of chunks is often represented as an array of * longs where every even-numbered index is a start coordinate * and every odd-numbered index is a stop coordinate. Convert * from that format back to a list of chunks. * @param coordinateArray List of chunks to convert. * @return A list of chunks. */ protected static SAMFileSpan toChunkList(final long[] coordinateArray) { if(coordinateArray.length % 2 != 0) throw new SAMException("Data supplied does not appear to be in coordinate array format."); final BAMFileSpan chunkList = new BAMFileSpan(); for(int i = 0; i < coordinateArray.length; i += 2) chunkList.add(new Chunk(coordinateArray[i],coordinateArray[i+1])); chunkList.validateSorted(); return chunkList; }
long splitStart = virtualSplit.getStartVirtualOffset(); long splitEnd = virtualSplit.getEndVirtualOffset(); BAMFileSpan splitSpan = new BAMFileSpan(new Chunk(splitStart, splitEnd)); BAMFileSpan span = fileToSpan.get(virtualSplit.getPath()); span = (BAMFileSpan) span.removeContentsBefore(splitSpan); span = (BAMFileSpan) span.removeContentsAfter(splitSpan); if (!span.getChunks().isEmpty()) { filteredSplits.add(new FileVirtualSplit(virtualSplit.getPath(), splitStart, splitEnd, virtualSplit.getLocations(), span.toCoordinateArray()));
/** * Deep clone the given chunk list. * @return A copy of the chunk list. */ public BAMFileSpan clone() { final BAMFileSpan clone = new BAMFileSpan(); for(final Chunk chunk: chunks) clone.chunks.add(chunk.clone()); return clone; }
} else { long chunkStart = index.getStartOfLastLinearBin(); long totalRecordRange = ((BAMFileSpan) reader.indexing().getFilePointerSpanningReads()).toCoordinateArray()[1]; span = new BAMFileSpan(new Chunk(chunkStart, totalRecordRange));
/** * Gets a file span over the data immediately following this span. * @return The a pointer to data immediately following this span. */ public SAMFileSpan getContentsFollowing() { if(chunks.isEmpty()) throw new SAMException("Unable to get the file pointer following this one: no data present."); validateSorted(); return new BAMFileSpan(new Chunk(chunks.get(chunks.size()-1).getChunkEnd(),Long.MAX_VALUE)); }
@Override public boolean equals(final Object other) { if(!(other instanceof BAMFileSpan)) return false; List<Chunk> theseChunks = getChunks(); List<Chunk> otherChunks = ((BAMFileSpan)other).getChunks(); if(theseChunks.size() != otherChunks.size()) return false; for(int i = 0; i < theseChunks.size(); i++) { if(!theseChunks.get(i).equals(otherChunks.get(i))) return false; } return true; }
@Override public CloseableIterator<SAMRecord> query(QueryInterval[] intervals, boolean contained) { BAMFileSpan span = new BAMFileSpan(); BrowseableBAMIndex index = getBrowseableIndex(); for (QueryInterval interval : intervals) { BAMFileSpan intervalSpan; if (!contained) { intervalSpan = index.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end); } else { intervalSpan = getSpanContained(interval.referenceIndex, interval.start, interval.end); } span.add(intervalSpan); } return getIterator(span); }
@Test(dataProvider = "testRemoveContentsBeforeProvider") public void testRemoveContentsBefore(BAMFileSpan originalSpan, BAMFileSpan cutoff, BAMFileSpan expectedSpan) { // only start value in cutoff is used Assert.assertEquals( ((BAMFileSpan) originalSpan.removeContentsBefore(cutoff)).getChunks(), expectedSpan.getChunks()); }
@Test(dataProvider = "testRemoveContentsAfterProvider") public void testRemoveContentsAfter(BAMFileSpan originalSpan, BAMFileSpan cutoff, BAMFileSpan expectedSpan) { // only end value in cutoff is used Assert.assertEquals( ((BAMFileSpan) originalSpan.removeContentsAfter(cutoff)).getChunks(), expectedSpan.getChunks()); }
@Test public static void testGetSpanOverlapping() { BAMFileSpan bfs1 = ucsi.getSpanOverlapping(1, 939520000, 939529000); BAMFileSpan bfs2 = ucsi.getSpanOverlapping(1, 240000000, 249228250); BAMFileSpan bfs3 = ubai.getSpanOverlapping(1, 240000000, 249228250); Assert.assertTrue(bfs1.isEmpty()); Assert.assertEquals(bfs2.getChunks(), bfs3.getChunks()); BAMFileSpan bfs4 = ucsi.getSpanOverlapping(bin10); Assert.assertEquals(bfs4.getChunks().size(), 3); } }
@Override public Chunk getChunk() { final SAMFileSource source = rec.getFileSource(); if (source == null) { throw new SAMException("No source (virtual file offsets); needed for indexing on BAM Record " + rec); } return ((BAMFileSpan) source.getFilePointer()).getSingleChunk(); } });
private void processAlignment(final SBIIndexWriter indexWriter, final SAMRecord rec) { final SAMFileSource source = rec.getFileSource(); if (source == null) { throw new SAMException("No source (virtual file offsets); needed for indexing on BAM Record " + rec); } final BAMFileSpan filePointer = (BAMFileSpan) source.getFilePointer(); indexWriter.processRecord(filePointer.getFirstOffset()); }
/** * Use the index to determine the chunk boundaries for the required intervals. * @param intervals the intervals to restrict reads to * @param fileIndex the BAM index to use * @return file pointer pairs corresponding to chunk boundaries */ public static BAMFileSpan getFileSpan(QueryInterval[] intervals, BAMIndex fileIndex) { final BAMFileSpan[] inputSpans = new BAMFileSpan[intervals.length]; for (int i = 0; i < intervals.length; ++i) { final QueryInterval interval = intervals[i]; final BAMFileSpan span = fileIndex.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end); inputSpans[i] = span; } final BAMFileSpan span; if (inputSpans.length > 0) { span = BAMFileSpan.merge(inputSpans); } else { span = null; } return span; }
long splitStart = virtualSplit.getStartVirtualOffset(); long splitEnd = virtualSplit.getEndVirtualOffset(); BAMFileSpan splitSpan = new BAMFileSpan(new Chunk(splitStart, splitEnd)); BAMFileSpan span = fileToSpan.get(virtualSplit.getPath()); if (span == null) { continue; span = (BAMFileSpan) span.removeContentsBefore(splitSpan); span = (BAMFileSpan) span.removeContentsAfter(splitSpan); if (!span.getChunks().isEmpty()) { filteredSplits.add(new FileVirtualSplit(virtualSplit.getPath(), splitStart, splitEnd, virtualSplit.getLocations(), span.toCoordinateArray()));
@Override public BAMFileSpan getSpanOverlapping(Bin bin) { return new BAMFileSpan(getBinChunks(bin)); }
/** * The list of chunks is often represented as an array of * longs where every even-numbered index is a start coordinate * and every odd-numbered index is a stop coordinate. Convert * from that format back to a list of chunks. * @param coordinateArray List of chunks to convert. * @return A list of chunks. */ protected static SAMFileSpan toChunkList(final long[] coordinateArray) { if(coordinateArray.length % 2 != 0) throw new SAMException("Data supplied does not appear to be in coordinate array format."); final BAMFileSpan chunkList = new BAMFileSpan(); for(int i = 0; i < coordinateArray.length; i += 2) chunkList.add(new Chunk(coordinateArray[i],coordinateArray[i+1])); chunkList.validateSorted(); return chunkList; }