/** * The maximum possible bin number for this reference sequence. * This is based on the maximum coordinate position of the reference * which is based on the size of the reference */ private int getMaxBinNumberForReference(final int reference) { try { final int sequenceLength = getBamDictionary().getSequence(reference).getSequenceLength(); return getFirstBinInLevelForCSI(getBinDepth() - 1) + (sequenceLength >> getMinShift()); } catch (final Exception e) { return getMaxBins(); } }
@Override public int getFirstLocusInBin(Bin bin) { if(bin == null || bin.getBinNumber() > getMaxBins()) { throw new SAMException("Tried to get first locus for invalid bin: " + bin); } int level = getLevelForBin(bin); int firstBinOnLevel = getFirstBinInLevelForCSI(level); int levelSize = getLevelSize(level); return (bin.getBinNumber() - firstBinOnLevel)*(getMaxSpan()/levelSize) + 1; }
/** * Return meta data for the given reference including information about number of aligned, unaligned, and noCoordinate records * * @param reference the reference of interest * @return meta data for the reference */ @Override public BAMIndexMetaData getMetaData(final int reference) { if (metaDataPos > 0 && position() != metaDataPos) { seek(metaDataPos); } final List<Chunk> metaDataChunks = new ArrayList<Chunk>(); final int sequenceCount = getNumberOfReferences(); if (reference >= sequenceCount) { return null; } skipToSequence(reference); final int binCount = readInteger(); // n_bin for (int binNumber = 0; binNumber < binCount; binNumber++) { final int indexBin = readInteger(); // bin final long lOffset = readLong(); // loffset final int nChunks = readInteger(); // n_chunk if (indexBin == getMaxBins() + 1) { readChunks(nChunks, metaDataChunks); } else { skipBytes(BAMFileConstants.CSI_CHUNK_SIZE * nChunks); } } return new BAMIndexMetaData(metaDataChunks); }
@Override protected final void initParameters() { readMinShiftAndBinDepth(); readAuxDataAndNRef(); setSequenceIndexes(getNumberOfReferences()); }
@Override public long getStartOfLastLinearBin() { if (metaDataPos > 0 && position() != metaDataPos) { seek(metaDataPos); } final int sequenceIndex = getNumberOfReferences(); long loffset = -1L; for (int i = 0; i < sequenceIndex; i++) { final int nBins = readInteger(); // n_bin for (int j = 0; j < nBins; j++) { readInteger(); // bin loffset = readLong(); // loffset final int nChunks = readInteger(); // n_chunk skipBytes(BAMFileConstants.CSI_CHUNK_SIZE * nChunks); } } return loffset; }
/** * Returns count of records unassociated with any reference. Call before the index file is closed * * @return meta data at the end of the bam index that indicates count of records holding no coordinates * or null if no meta data (old index format) */ @Override public Long getNoCoordinateCount() { if (metaDataPos > 0 && position() != metaDataPos) { seek(metaDataPos); } skipToSequence(getNumberOfReferences()); try { // in case of old index file without meta data return readLong(); } catch (final Exception e) { return null; } }
@Override protected BAMIndexContent query(final int referenceSequence, final int startPos, final int endPos) { if (metaDataPos > 0 && position() != metaDataPos) { seek(metaDataPos); final int sequenceCount = getNumberOfReferences(); final BitSet regionBins = GenomicIndexUtil.regionToBins(startPos, endPos, getMinShift(), getBinDepth()); if (regionBins == null) { return null; skipToSequence(referenceSequence); final int binCount = readInteger(); // n_bin boolean metaDataSeen = false; final Bin[] bins = new BinWithOffset[getMaxBinNumberForReference(referenceSequence) +1]; for (int binNumber = 0; binNumber < binCount; binNumber++) { final int indexBin = readInteger(); // bin final long lOffset = readLong(); // l_offset final int nChunks = readInteger(); // n_chunk List<Chunk> chunks; if (regionBins.get(indexBin)) { chunks = new ArrayList<Chunk>(nChunks); readChunks(nChunks, chunks); } else if (indexBin == getMaxBins() + 1) { readChunks(nChunks, metaDataChunks); metaDataSeen = true; continue; // don't create a Bin
final BAMIndexContent queryResults = getQueryResults(referenceSequence); final int binLevel = getLevelForBin(bin); final int firstLocusInBin = getFirstLocusInBin(bin); long minimumOffset = bin instanceof BinWithOffset ? ((BinWithOffset)bin).getlOffset() : 0L; final int binStart = getFirstBinInLevelForCSI(currentBinLevel); final int binWidth = getMaxSpan()/getLevelSize(currentBinLevel); final int parentBinNumber = firstLocusInBin/binWidth + binStart; final Bin parentBin = queryResults.getBins().getBin(parentBinNumber);
@Override public BAMFileSpan getSpanOverlapping(int referenceIndex, int startPos, int endPos) { final BAMIndexContent queryResults = query(referenceIndex, startPos, endPos); int initialBinNumber = getFirstBinInLevelForCSI(getBinDepth() - 1) + (startPos - 1 >> getMinShift()); long minimumOffset = 0L; Bin targetBin; break; firstBinNumber = (getParentBinNumber(initialBinNumber)<<3) + 1; if (initialBinNumber > firstBinNumber) { initialBinNumber--; } else { initialBinNumber = getParentBinNumber(initialBinNumber);
@Override public int getLevelForBin(Bin bin) { if(bin == null || bin.getBinNumber() > getMaxBins()) { throw new SAMException("Tried to get level for invalid bin: " + bin); } for (int i = getBinDepth()-1; i > -1 ; i--) { if (bin.getBinNumber() >= getFirstBinInLevelForCSI(i)) { return i; } } throw new SAMException("Unable to find correct level for bin: " + bin); }
@Override public BinList getBinsOverlapping(int referenceIndex, int startPos, int endPos) { final BitSet regionBins = GenomicIndexUtil.regionToBins(startPos, endPos, getMinShift(), getBinDepth()); if (regionBins == null) { return null; } return new BinList(referenceIndex,regionBins); }
final int numRefs = existingIndex.getNumberOfReferences(); final BAMIndexContent content = existingIndex.getQueryResults(i);
/** * Extends the functionality of {@link AbstractBAMFileIndex#getFirstBinInLevel(int)} , * which cannot be overridden due to its static nature. */ public int getFirstBinInLevelForCSI(final int levelNumber) { if (levelNumber >= getBinDepth()) { throw new SAMException("Level number (" + levelNumber + ") is greater than or equal to maximum (" + getBinDepth() + ")."); } return ((1<<3*levelNumber) - 1)/7; }
@Test (expectedExceptions = SAMException.class) public static void testGetLevelSizeFail1() { csi.getLevelSize(6); }
@Test (expectedExceptions = SAMException.class) public static void testGetFirstBinInLevelFail2() { ucsi.getFirstBinInLevelForCSI(7); }
public int getParentBinNumber(int binNumber) { if (binNumber >= getMaxBins()) { throw new SAMException("Tried to get parent bin for invalid bin (" + binNumber + ")."); } if (binNumber == 0) { return 0; } return (binNumber - 1) >> 3; }
@BeforeTest public void init() { bai = new DiskBasedBAMFileIndex(new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam.bai"), null); csi = new CSIIndex(new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam.csi"), false, null); mcsi = new CSIIndex(new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/index_test.bam.csi"), true, null); try { ucsi = new CSIIndex(Paths.get("src/test/resources/htsjdk/samtools/BAMFileIndexTest/uncompressed_index.bam.csi"), null); } catch (IOException e) { e.printStackTrace(); } ubai = new DiskBasedBAMFileIndex(new File("src/test/resources/htsjdk/samtools/BAMFileIndexTest/uncompressed_index.bam.bai"),null); }
@Test public static void testGetMinShift() { Assert.assertEquals(csi.getMinShift(), 14); Assert.assertEquals(ucsi.getMinShift(), 12); } @Test
@Test public static void testGetMaxSpan() { Assert.assertEquals(csi.getMaxSpan(),512*1024*1024); Assert.assertEquals(ucsi.getMaxSpan(),1024*1024*1024); }
@Test public static void testGetNReferences() { Assert.assertEquals(csi.getNumberOfReferences(), 45); }