/** * Provides a list of bins that contain bases at requested positions * @param referenceIndex sequence of desired SAMRecords * @param startPos 1-based start of the desired interval, inclusive * @param endPos 1-based end of the desired interval, inclusive * @return a list of bins that contain relevant data */ @Override public BinList getBinsOverlapping(int referenceIndex, int startPos, int endPos) { long refLength = recordRangeInfo.getReferenceLengthsAligned().get(referenceIndex); // convert to chunk address space within reference long refStartPos = startPos - 1; long refEndPos = endPos; if (refEndPos >= refLength) { throw new RuntimeException("refEndPos is larger than reference length"); } int firstBinNumber = (int)refStartPos / SRA_BIN_SIZE; int lastBinNumber = (int)(refEndPos - 1) / SRA_BIN_SIZE; int numberOfBins = ((int)refLength / SRA_BIN_SIZE) + 1; BitSet binBitSet = new BitSet(); binBitSet.set(0, SRA_BIN_INDEX_OFFSET, false); if (firstBinNumber > 0) { binBitSet.set(SRA_BIN_INDEX_OFFSET, SRA_BIN_INDEX_OFFSET + firstBinNumber, false); } binBitSet.set(SRA_BIN_INDEX_OFFSET + firstBinNumber, SRA_BIN_INDEX_OFFSET + lastBinNumber + 1, true); if (lastBinNumber + 1 < numberOfBins) { binBitSet.set(SRA_BIN_INDEX_OFFSET + lastBinNumber + 1, SRA_BIN_INDEX_OFFSET + numberOfBins, false); } return new BinList(referenceIndex, binBitSet); }
private BAMFileSpan getSpanContained(int sequenceIndex, long start, long end) { if (recordRangeInfo.getTotalRecordRangeLength() <= 0) { throw new RuntimeException("Cannot create file span - SRA file is empty"); } long sequenceOffset = recordRangeInfo.getReferenceOffsets().get(sequenceIndex); long sequenceLength = recordRangeInfo.getReferenceLengthsAligned().get(sequenceIndex); if (end == -1) { end = sequenceLength; } if (start > sequenceLength) { throw new IllegalArgumentException("Sequence start position is larger than its length"); } if (end > sequenceLength) { throw new IllegalArgumentException("Sequence end position is larger than its length"); } return new BAMFileSpan(new Chunk(sequenceOffset + start, sequenceOffset + end)); } }
while (currentChunk != null) { if (alignmentIterator == null) { if (currentChunk.getChunkStart() < recordRangeInfo.getTotalReferencesLength()) { alignmentIterator = new SRAAlignmentIterator(accession, run, header, cachedReferences, recordRangeInfo, currentChunk); if (validationStringency != null) { if (currentChunk.getChunkEnd() > recordRangeInfo.getTotalReferencesLength()) { unalignmentIterator = new SRAUnalignmentIterator(accession, run, header, recordRangeInfo, currentChunk); if (validationStringency != null) {
while (currentChunk != null) { if (alignmentIterator == null) { if (currentChunk.getChunkStart() < recordRangeInfo.getTotalReferencesLength()) { alignmentIterator = new SRAAlignmentIterator(accession, run, header, cachedReferences, recordRangeInfo, currentChunk); if (validationStringency != null) { if (currentChunk.getChunkEnd() > recordRangeInfo.getTotalReferencesLength()) { unalignmentIterator = new SRAUnalignmentIterator(accession, run, header, recordRangeInfo, currentChunk); if (validationStringency != null) {
this.recordRangeInfo = recordRangeInfo; long readStart = chunk.getChunkStart() - recordRangeInfo.getTotalReferencesLength(); if (readStart < 0) { readStart = 0; } else if (readStart >= recordRangeInfo.getNumberOfReads()) { throw new RuntimeException("Invalid chunk provided: chunkStart position is after last read"); long readEnd = chunk.getChunkEnd() - recordRangeInfo.getTotalReferencesLength(); if (readEnd > recordRangeInfo.getNumberOfReads()) { readEnd = recordRangeInfo.getNumberOfReads(); } else if (readEnd <= 0) { throw new RuntimeException("Invalid chunk provided: chunkEnd position is before last read");
this.recordRangeInfo = recordRangeInfo; long readStart = chunk.getChunkStart() - recordRangeInfo.getTotalReferencesLength(); if (readStart < 0) { readStart = 0; } else if (readStart >= recordRangeInfo.getNumberOfReads()) { throw new RuntimeException("Invalid chunk provided: chunkStart position is after last read"); long readEnd = chunk.getChunkEnd() - recordRangeInfo.getTotalReferencesLength(); if (readEnd > recordRangeInfo.getNumberOfReads()) { readEnd = recordRangeInfo.getNumberOfReads(); } else if (readEnd <= 0) { throw new RuntimeException("Invalid chunk provided: chunkEnd position is before last read");
/** * Provides a list of bins that contain bases at requested positions * @param referenceIndex sequence of desired SAMRecords * @param startPos 1-based start of the desired interval, inclusive * @param endPos 1-based end of the desired interval, inclusive * @return a list of bins that contain relevant data */ @Override public BinList getBinsOverlapping(int referenceIndex, int startPos, int endPos) { long refLength = recordRangeInfo.getReferenceLengthsAligned().get(referenceIndex); // convert to chunk address space within reference long refStartPos = startPos - 1; long refEndPos = endPos; if (refEndPos >= refLength) { throw new RuntimeException("refEndPos is larger than reference length"); } int firstBinNumber = (int)refStartPos / SRA_BIN_SIZE; int lastBinNumber = (int)(refEndPos - 1) / SRA_BIN_SIZE; int numberOfBins = ((int)refLength / SRA_BIN_SIZE) + 1; BitSet binBitSet = new BitSet(); binBitSet.set(0, SRA_BIN_INDEX_OFFSET, false); if (firstBinNumber > 0) { binBitSet.set(SRA_BIN_INDEX_OFFSET, SRA_BIN_INDEX_OFFSET + firstBinNumber, false); } binBitSet.set(SRA_BIN_INDEX_OFFSET + firstBinNumber, SRA_BIN_INDEX_OFFSET + lastBinNumber + 1, true); if (lastBinNumber + 1 < numberOfBins) { binBitSet.set(SRA_BIN_INDEX_OFFSET + lastBinNumber + 1, SRA_BIN_INDEX_OFFSET + numberOfBins, false); } return new BinList(referenceIndex, binBitSet); }
/** * @param bin Requested bin * @return chunks that represent all bases of requested bin */ private List<Chunk> getBinChunks(Bin bin) { if (bin.containsChunks()) { return bin.getChunkList(); } if (bin.getBinNumber() < SRA_BIN_INDEX_OFFSET) { throw new RuntimeException("SRA only supports bins from the last level"); } int binNumber = bin.getBinNumber() - SRA_BIN_INDEX_OFFSET; long refOffset = recordRangeInfo.getReferenceOffsets().get(bin.getReferenceSequence()); // move requested position MAX_FRAGMENT_OVERLAP bases behind, so that we take all the reads that overlap requested position int firstChunkCorrection = binNumber == 0 ? 0 : -MAX_FRAGMENT_OVERLAP; long binGlobalOffset = binNumber * SRA_BIN_SIZE + refOffset; long firstChunkNumber = (binGlobalOffset + firstChunkCorrection) / SRA_CHUNK_SIZE; long lastChunkNumber = (binGlobalOffset + SRA_BIN_SIZE - 1) / SRA_CHUNK_SIZE; List<Chunk> chunks = new ArrayList<Chunk>(); for (long chunkNumber = firstChunkNumber; chunkNumber <= lastChunkNumber; chunkNumber++) { chunks.add(new Chunk(chunkNumber * SRA_CHUNK_SIZE, (chunkNumber + 1) * SRA_CHUNK_SIZE)); } return chunks; } }
/** * @param bin Requested bin * @return chunks that represent all bases of requested bin */ private List<Chunk> getBinChunks(Bin bin) { if (bin.containsChunks()) { return bin.getChunkList(); } if (bin.getBinNumber() < SRA_BIN_INDEX_OFFSET) { throw new RuntimeException("SRA only supports bins from the last level"); } int binNumber = bin.getBinNumber() - SRA_BIN_INDEX_OFFSET; long refOffset = recordRangeInfo.getReferenceOffsets().get(bin.getReferenceSequence()); // move requested position MAX_FRAGMENT_OVERLAP bases behind, so that we take all the reads that overlap requested position int firstChunkCorrection = binNumber == 0 ? 0 : -MAX_FRAGMENT_OVERLAP; long binGlobalOffset = binNumber * SRA_BIN_SIZE + refOffset; long firstChunkNumber = (binGlobalOffset + firstChunkCorrection) / SRA_CHUNK_SIZE; long lastChunkNumber = (binGlobalOffset + SRA_BIN_SIZE - 1) / SRA_CHUNK_SIZE; List<Chunk> chunks = new ArrayList<Chunk>(); for (long chunkNumber = firstChunkNumber; chunkNumber <= lastChunkNumber; chunkNumber++) { chunks.add(new Chunk(chunkNumber * SRA_CHUNK_SIZE, (chunkNumber + 1) * SRA_CHUNK_SIZE)); } return chunks; } }
/** * @param run opened read collection * @param header sam header * @param cachedReferences list of cached references shared among all iterators from a single SRAFileReader * @param recordRangeInfo info about record ranges withing SRA archive * @param chunk used to determine which alignments the iterator should return */ public SRAAlignmentIterator(SRAAccession accession, final ReadCollection run, final SAMFileHeader header, ReferenceCache cachedReferences, final SRAIterator.RecordRangeInfo recordRangeInfo, final Chunk chunk) { this.accession = accession; this.run = run; this.header = header; this.cachedReferences = cachedReferences; this.referencesLengths = recordRangeInfo.getReferenceLengthsAligned(); referencesChunksIterator = getReferenceChunks(chunk).iterator(); try { nextReference(); } catch (final Exception e) { throw new RuntimeException(e); } }
/** * @param run opened read collection * @param header sam header * @param cachedReferences list of cached references shared among all iterators from a single SRAFileReader * @param recordRangeInfo info about record ranges withing SRA archive * @param chunk used to determine which alignments the iterator should return */ public SRAAlignmentIterator(SRAAccession accession, final ReadCollection run, final SAMFileHeader header, ReferenceCache cachedReferences, final SRAIterator.RecordRangeInfo recordRangeInfo, final Chunk chunk) { this.accession = accession; this.run = run; this.header = header; this.cachedReferences = cachedReferences; this.referencesLengths = recordRangeInfo.getReferenceLengthsAligned(); referencesChunksIterator = getReferenceChunks(chunk).iterator(); try { nextReference(); } catch (final Exception e) { throw new RuntimeException(e); } }
private BAMFileSpan getSpanContained(int sequenceIndex, long start, long end) { if (recordRangeInfo.getTotalRecordRangeLength() <= 0) { throw new RuntimeException("Cannot create file span - SRA file is empty"); } long sequenceOffset = recordRangeInfo.getReferenceOffsets().get(sequenceIndex); long sequenceLength = recordRangeInfo.getReferenceLengthsAligned().get(sequenceIndex); if (end == -1) { end = sequenceLength; } if (start > sequenceLength) { throw new IllegalArgumentException("Sequence start position is larger than its length"); } if (end > sequenceLength) { throw new IllegalArgumentException("Sequence end position is larger than its length"); } return new BAMFileSpan(new Chunk(sequenceOffset + start, sequenceOffset + end)); } }
@Override public SAMFileSpan getFilePointerSpanningReads() { if (recordRangeInfo.getTotalRecordRangeLength() <= 0) { throw new RuntimeException("Cannot create file span - SRA file is empty"); } return new BAMFileSpan(new Chunk(0, recordRangeInfo.getTotalRecordRangeLength())); }
/** * @return a position where aligned fragments end */ @Override public long getStartOfLastLinearBin() { int numberOfReferences = recordRangeInfo.getReferenceLengthsAligned().size(); long refOffset = recordRangeInfo.getReferenceOffsets().get(numberOfReferences - 1); long lastChunkNumber = recordRangeInfo.getReferenceLengthsAligned().get(numberOfReferences - 1) / SRA_CHUNK_SIZE; return lastChunkNumber * SRA_CHUNK_SIZE + refOffset; }
@Override public CloseableIterator<SAMRecord> queryUnmapped() { if (recordRangeInfo.getTotalRecordRangeLength() <= 0) { throw new RuntimeException("Cannot create file span - SRA file is empty"); } SAMFileSpan span = new BAMFileSpan(new Chunk(recordRangeInfo.getTotalReferencesLength(), recordRangeInfo.getTotalRecordRangeLength())); return getIterator(span); }
/** * Loads record ranges needed for emulating BAM index * @param run read collection * @return record ranges */ public static RecordRangeInfo getRecordsRangeInfo(ReadCollection run) { try { return new RecordRangeInfo(SRAUtils.getReferencesLengthsAligned(run), SRAUtils.getNumberOfReads(run)); } catch (ErrorMsg e) { throw new RuntimeException(e); } }
@Override public SAMFileSpan getFilePointerSpanningReads() { if (recordRangeInfo.getTotalRecordRangeLength() <= 0) { throw new RuntimeException("Cannot create file span - SRA file is empty"); } return new BAMFileSpan(new Chunk(0, recordRangeInfo.getTotalRecordRangeLength())); }
/** * Loads record ranges needed for emulating BAM index * @param run read collection * @return record ranges */ public static RecordRangeInfo getRecordsRangeInfo(ReadCollection run) { try { return new RecordRangeInfo(SRAUtils.getReferencesLengthsAligned(run), SRAUtils.getNumberOfReads(run)); } catch (ErrorMsg e) { throw new RuntimeException(e); } }
@Override public CloseableIterator<SAMRecord> queryUnmapped() { if (recordRangeInfo.getTotalRecordRangeLength() <= 0) { throw new RuntimeException("Cannot create file span - SRA file is empty"); } SAMFileSpan span = new BAMFileSpan(new Chunk(recordRangeInfo.getTotalReferencesLength(), recordRangeInfo.getTotalRecordRangeLength())); return getIterator(span); }
/** * @return a position where aligned fragments end */ @Override public long getStartOfLastLinearBin() { int numberOfReferences = recordRangeInfo.getReferenceLengthsAligned().size(); long refOffset = recordRangeInfo.getReferenceOffsets().get(numberOfReferences - 1); long lastChunkNumber = recordRangeInfo.getReferenceLengthsAligned().get(numberOfReferences - 1) / SRA_CHUNK_SIZE; return lastChunkNumber * SRA_CHUNK_SIZE + refOffset; }