/** * Find index of the last aligned entry in the list. Assumes the index is sorted by coordinate and unmapped entries (with sequence id = -1) follow the mapped entries. * * @param list a list of CRAI entries * @return integer index of the last entry with sequence id not equal to -1 */ public static int findLastAlignedEntry(final List<CRAIEntry> list) { if (list.isEmpty()) { return -1; } int low = 0; int high = list.size() - 1; while (low <= high) { final int mid = (low + high) >>> 1; final CRAIEntry midVal = list.get(mid); if (midVal.getSequenceId() >= 0) { low = mid + 1; } else { high = mid - 1; } } if (low >= list.size()) { return list.size() - 1; } for (; low >= 0 && list.get(low).getSequenceId() == -1; low--) { } return low; }
public static List<CRAIEntry> find(final List<CRAIEntry> list, final int seqId, final int start, final int span) { final boolean whole = start < 1 || span < 1; final CRAIEntry query = new CRAIEntry(seqId, start < 1 ? 1 : start, span < 1 ? Integer.MAX_VALUE : span, Long.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE); final List<CRAIEntry> l = new ArrayList<>(); for (final CRAIEntry e : list) { if (e.getSequenceId() != seqId) { continue; } if (whole || CRAIEntry.intersect(e, query)) { l.add(e); } } Collections.sort(l, CRAIEntry.byStart); return l; }
@Test public void testFindLastAlignedEntry() { final List<CRAIEntry> index = new ArrayList<CRAIEntry>(); Assert.assertEquals(-1, CRAIIndex.findLastAlignedEntry(index)); // Scan all allowed combinations of 10 mapped/unmapped entries and assert the found last aligned entry: final int indexSize = 10; for (int lastAligned = 0; lastAligned < indexSize; lastAligned++) { index.clear(); for (int i = 0; i < indexSize; i++) { final CRAIEntry e = CRAIEntryTest.newEntrySeqStart(i <= lastAligned ? 0 : -1, i); index.add(e); } // check expectations are correct before calling findLastAlignedEntry method: Assert.assertTrue(index.get(lastAligned).getSequenceId() != -1); if (lastAligned < index.size() - 1) { Assert.assertTrue(index.get(lastAligned + 1).getSequenceId() == -1); } // assert the the found value matches the expectation: Assert.assertEquals(CRAIIndex.findLastAlignedEntry(index), lastAligned); } }
public static CRAIEntry updateStart(final CRAIEntry toClone, final int alignmentStart) { return newEntry(toClone.getSequenceId(), alignmentStart, toClone.getAlignmentSpan()); }
@Test public void testCompareTo () { final List<CRAIEntry> list = new ArrayList<>(2); CRAIEntry e1; CRAIEntry e2; e1 = newEntry(100, 0, 0); e2 = newEntry(200, 0, 0); list.add(e2); list.add(e1); Assert.assertTrue(list.get(1).getSequenceId() < list.get(0).getSequenceId()); Collections.sort(list); Assert.assertTrue(list.get(0).getSequenceId() < list.get(1).getSequenceId()); list.clear(); e1 = newEntry(1, 100, 0); e2 = newEntry(1, 200, 0); list.add(e2); list.add(e1); Assert.assertTrue(list.get(1).getAlignmentStart() < list.get(0).getAlignmentStart()); Collections.sort(list); Assert.assertTrue(list.get(0).getAlignmentStart() < list.get(1).getAlignmentStart()); list.clear(); e1 = newEntryContOffset(100); e2 = newEntryContOffset(200); list.add(e2); list.add(e1); Assert.assertTrue(list.get(1).getContainerStartByteOffset() < list.get(0).getContainerStartByteOffset()); Collections.sort(list); Assert.assertTrue(list.get(0).getContainerStartByteOffset() < list.get(1).getContainerStartByteOffset()); }
@Test public void testFromCraiLine() { int counter = 1; final int sequenceId = counter++; final int alignmentStart = counter++; final int alignmentSpan = counter++; final int containerOffset = Integer.MAX_VALUE + counter++; final int sliceOffset = counter++; final int sliceSize = counter++; final String line = String.format("%d\t%d\t%d\t%d\t%d\t%d", sequenceId, alignmentStart, alignmentSpan, containerOffset, sliceOffset, sliceSize); final CRAIEntry entry = new CRAIEntry(line); Assert.assertNotNull(entry); Assert.assertEquals(entry.getSequenceId(), sequenceId); Assert.assertEquals(entry.getAlignmentStart(), alignmentStart); Assert.assertEquals(entry.getAlignmentSpan(), alignmentSpan); Assert.assertEquals(entry.getContainerStartByteOffset(), containerOffset); Assert.assertEquals(entry.getSliceByteOffset(), sliceOffset); Assert.assertEquals(entry.getSliceByteSize(), sliceSize); }
@Test public void testFromContainer() { final Container container = new Container(); final Slice slice = new Slice(); slice.sequenceId = 1; slice.alignmentStart = 2; slice.alignmentSpan = 3; slice.containerOffset = 4; slice.offset = 5; slice.size = 6; container.landmarks = new int[]{7}; container.slices = new Slice[]{slice}; final List<CRAIEntry> entries = container.getCRAIEntries(); Assert.assertNotNull(entries); Assert.assertEquals(entries.size(), 1); final CRAIEntry entry = entries.get(0); Assert.assertEquals(entry.getSequenceId(), slice.sequenceId); Assert.assertEquals(entry.getAlignmentStart(), slice.alignmentStart); Assert.assertEquals(entry.getAlignmentSpan(), slice.alignmentSpan); Assert.assertEquals(entry.getContainerStartByteOffset(), slice.containerOffset); Assert.assertEquals(entry.getSliceByteOffset(), slice.offset); Assert.assertEquals(entry.getSliceByteSize(), slice.size); }
private void doCRAITest(BiFunction<SAMSequenceDictionary, List<CRAIEntry>, SeekableStream> getBaiStreamForIndex) { final ArrayList<CRAIEntry> index = new ArrayList<>(); final CRAIEntry entry = CRAIEntryTest.newEntry(0, 1, 2, 5, 3, 4); index.add(entry); final SAMSequenceDictionary dictionary = new SAMSequenceDictionary(); dictionary.addSequence(new SAMSequenceRecord("1", 100)); final SeekableStream baiStream = getBaiStreamForIndex.apply(dictionary, index); final DiskBasedBAMFileIndex bamIndex = new DiskBasedBAMFileIndex(baiStream, dictionary); final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.getSequenceId(), entry.getAlignmentStart(), entry.getAlignmentStart()); Assert.assertNotNull(span); final long[] coordinateArray = span.toCoordinateArray(); Assert.assertEquals(coordinateArray.length, 2); Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartByteOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); }
public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStream, final SAMSequenceDictionary dictionary) { final List<CRAIEntry> full = CRAMCRAIIndexer.readIndex(indexStream).getCRAIEntries(); Collections.sort(full); final SAMFileHeader header = new SAMFileHeader(); header.setSequenceDictionary(dictionary); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); final CRAMBAIIndexer indexer = new CRAMBAIIndexer(baos, header); for (final CRAIEntry entry : full) { final Slice slice = new Slice(); slice.containerOffset = entry.getContainerStartByteOffset(); slice.alignmentStart = entry.getAlignmentStart(); slice.alignmentSpan = entry.getAlignmentSpan(); slice.sequenceId = entry.getSequenceId(); // NOTE: the recordCount and sliceIndex fields can't be derived from the CRAM index // so we can only set them to zero // see https://github.com/samtools/htsjdk/issues/531 slice.nofRecords = 0; slice.index = 0; slice.offset = entry.getSliceByteOffset(); indexer.processSingleReferenceSlice(slice); } indexer.finish(); return new SeekableMemoryStream(baos.toByteArray(), "CRAI to BAI converter"); }
public static CRAIEntry updateStartContOffset(final CRAIEntry toClone, final int alignmentStart, final int containerStartOffset) { return newEntry(toClone.getSequenceId(), alignmentStart, toClone.getAlignmentSpan(), containerStartOffset, toClone.getSliceByteOffset(), toClone.getSliceByteSize()); } }
private boolean allFoundEntriesIntersectQueryInFind(final List<CRAIEntry> index, final int sequenceId, final int start, final int span) { int foundCount = 0; for (final CRAIEntry found : CRAIIndex.find(index, sequenceId, start, span)) { foundCount++; Assert.assertEquals(found.getSequenceId(), sequenceId); boolean intersects = false; for (int pos = Math.min(found.getAlignmentStart(), start); pos <= Math.max(found.getAlignmentStart() + found.getAlignmentSpan(), start + span); pos++) { if (pos >= found.getAlignmentStart() && pos >= start && pos <= found.getAlignmentStart() + found.getAlignmentSpan() && pos <= start + span) { intersects = true; break; } } if (!intersects) { return false; } } return foundCount > 0; }
@Test public void testOpenIndexUrlAsBaiOrNull() throws IOException { final SAMSequenceDictionary dictionary = new SAMSequenceDictionary(); dictionary.addSequence(new SAMSequenceRecord("1", 100)); final File file = File.createTempFile("test", ".crai"); file.deleteOnExit(); final FileOutputStream fos = new FileOutputStream(file); SAMFileHeader header = new SAMFileHeader(); header.setSortOrder(SAMFileHeader.SortOrder.coordinate); CRAMCRAIIndexer indexer = new CRAMCRAIIndexer(fos, header); final CRAIEntry entry = CRAIEntryTest.newEntry(0, 1, 2, 5, 3, 4); indexer.addEntry(entry); indexer.finish(); fos.close(); final InputStream baiStream = SamIndexes.openIndexUrlAsBaiOrNull(file.toURI().toURL(), dictionary); Assert.assertNotNull(baiStream); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); IOUtil.copyStream(baiStream, baos); final CachingBAMFileIndex bamIndex = new CachingBAMFileIndex(new SeekableMemoryStream(baos.toByteArray(), null), dictionary); final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.getSequenceId(), entry.getAlignmentStart(), entry.getAlignmentStart()); Assert.assertNotNull(span); final long[] coordinateArray = span.toCoordinateArray(); Assert.assertEquals(coordinateArray.length, 2); Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartByteOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); } }
@Test public void testCraiInMemory() throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); SAMFileHeader header = new SAMFileHeader(); header.setSortOrder(SAMFileHeader.SortOrder.coordinate); CRAMCRAIIndexer indexer = new CRAMCRAIIndexer(baos, header); final CRAIEntry entry = CRAIEntryTest.newEntry(0, 1, 2, 5, 3, 4); indexer.addEntry(entry); indexer.finish(); baos.close(); final SAMSequenceDictionary dictionary = new SAMSequenceDictionary(); dictionary.addSequence(new SAMSequenceRecord("1", 100)); final InputStream baiStream = SamIndexes.asBaiStreamOrNull(new ByteArrayInputStream(baos.toByteArray()), dictionary); Assert.assertNotNull(baiStream); baos = new ByteArrayOutputStream(); IOUtil.copyStream(baiStream, baos); final CachingBAMFileIndex bamIndex = new CachingBAMFileIndex(new SeekableMemoryStream(baos.toByteArray(), null), dictionary); final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.getSequenceId(), entry.getAlignmentStart(), entry.getAlignmentStart()); Assert.assertNotNull(span); final long[] coordinateArray = span.toCoordinateArray(); Assert.assertEquals(coordinateArray.length, 2); Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartByteOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); }
@Test public void testCraiFromFile() throws IOException { final File file = File.createTempFile("test", ".crai"); file.deleteOnExit(); final FileOutputStream fos = new FileOutputStream(file); SAMFileHeader header = new SAMFileHeader(); header.setSortOrder(SAMFileHeader.SortOrder.coordinate); CRAMCRAIIndexer indexer = new CRAMCRAIIndexer(fos, header); final CRAIEntry entry = CRAIEntryTest.newEntry(0, 1, 2, 5, 3, 4); indexer.addEntry(entry); indexer.finish(); fos.close(); final SAMSequenceDictionary dictionary = new SAMSequenceDictionary(); dictionary.addSequence(new SAMSequenceRecord("1", 100)); final SeekableStream baiStream = SamIndexes.asBaiSeekableStreamOrNull(new SeekableFileStream(file), dictionary); Assert.assertNotNull(baiStream); final CachingBAMFileIndex bamIndex = new CachingBAMFileIndex(baiStream, dictionary); final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.getSequenceId(), entry.getAlignmentStart(), entry.getAlignmentStart()); Assert.assertNotNull(span); final long[] coordinateArray = span.toCoordinateArray(); Assert.assertEquals(coordinateArray.length, 2); Assert.assertEquals(coordinateArray[0] >> 16, entry.getContainerStartByteOffset()); Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1); }