/** * Create a new GATKBAMFileSpan from an existing BAMFileSpan. * @param sourceFileSpan */ public GATKBAMFileSpan(SAMFileSpan sourceFileSpan) { if(!(sourceFileSpan instanceof BAMFileSpan)) throw new SAMException("Unable to create GATKBAMFileSpan from a SAMFileSpan. Please submit a BAMFileSpan instead"); BAMFileSpan sourceBAMFileSpan = (BAMFileSpan)sourceFileSpan; for(Chunk chunk: sourceBAMFileSpan.getChunks()) add(chunk instanceof GATKChunk ? chunk : new GATKChunk(chunk)); }
private void validateReadGroup(final SAMRecord record, final SAMFileHeader header) { final SAMReadGroupRecord rg = record.getReadGroup(); if (rg == null) { addError(new SAMValidationError(Type.RECORD_MISSING_READ_GROUP, "A record is missing a read group", record.getReadName())); } else if (header.getReadGroup(rg.getId()) == null) { addError(new SAMValidationError(Type.READ_GROUP_NOT_FOUND, "A record has a read group not found in the header: ", record.getReadName() + ", " + rg.getReadGroupId())); } }
/** * reinitialize all data structures when the reference changes */ void startNewReference() { ++currentReference; // I'm not crazy about recycling this object, but that is the way it was originally written and // it helps keep track of no-coordinate read count (which shouldn't be stored in this class anyway). indexStats.newReference(); binningIndexBuilder = new BinningIndexBuilder(currentReference, sequenceDictionary.getSequence(currentReference).getSequenceLength()); } }
public static long countSamTotalRecord(final File samFile) { final SamReader reader = SamReaderFactory.make().open(samFile); assert reader.hasIndex(); long total = 0; for (int i = 0; i < reader.getFileHeader().getSequenceDictionary().size(); i++) { total += reader.indexing().getIndex().getMetaData(i).getAlignedRecordCount(); total += reader.indexing().getIndex().getMetaData(i).getUnalignedRecordCount(); } return total; } }
private static void assertSortOrdersAreEqual(final SAMFileHeader newHeader, final File inputFile) throws IOException { final SamReader reader = SamReaderFactory.makeDefault().open(inputFile); final SAMFileHeader origHeader = reader.getFileHeader(); final SAMFileHeader.SortOrder newSortOrder = newHeader.getSortOrder(); if (newSortOrder != SAMFileHeader.SortOrder.unsorted && newSortOrder != origHeader.getSortOrder()) { throw new SAMException("Sort order of new header does not match the original file, needs to be " + origHeader.getSortOrder()); } reader.close(); } }
private static boolean containsNOperator(final SAMRecord read) { final Cigar cigar = read.getCigar(); if (cigar == null) { return false; } for (final CigarElement ce : cigar.getCigarElements()) { if (ce.getOperator() == CigarOperator.N) { return true; } } return false; }
/** * Prepare to index a BAM. * @param output Index will be written here. output will be closed when finish() method is called. * @param fileHeader header for the corresponding bam file. */ public BAMIndexer(final OutputStream output, final SAMFileHeader fileHeader) { numReferences = fileHeader.getSequenceDictionary().size(); indexBuilder = new BAMIndexBuilder(fileHeader.getSequenceDictionary()); outputWriter = new BinaryBAMIndexWriter(numReferences, output); }
/** * Create a CRAM indexer that writes BAI to a stream. * * @param output Index will be written here. output will be closed when finish() method is called. * @param fileHeader header for the corresponding bam file. */ public CRAMBAIIndexer(final OutputStream output, final SAMFileHeader fileHeader) { numReferences = fileHeader.getSequenceDictionary().size(); indexBuilder = new BAMIndexBuilder(fileHeader); outputWriter = new BinaryBAMIndexWriter(numReferences, output); }
public BAMShardIndexer(OutputStream output, SAMFileHeader header, int reference) { indexBuilder = new BAMIndexBuilder(header.getSequenceDictionary(), reference); final boolean isFirstIndexShard = reference == 0; final int numReferencesToWriteInTheHeader = isFirstIndexShard ? header.getSequenceDictionary().size() : 0; outputWriter = new BinaryBAMShardIndexWriter(numReferencesToWriteInTheHeader, output); this.referenceIndex = reference; }
private void parsePGLine(final ParsedHeaderLine parsedHeaderLine) { assert(HeaderRecordType.PG.equals(parsedHeaderLine.getHeaderRecordType())); if (!parsedHeaderLine.requireTag(SAMProgramRecord.PROGRAM_GROUP_ID_TAG)) { return; } final SAMProgramRecord programRecord = new SAMProgramRecord(parsedHeaderLine.removeValue(SAMProgramRecord.PROGRAM_GROUP_ID_TAG)); transferAttributes(programRecord, parsedHeaderLine.mKeyValuePairs); mFileHeader.addProgramRecord(programRecord); }
private CloseableIterator<SAMRecord> createIndexIterator(final QueryInterval[] intervals, final boolean contained) { assertIntervalsOptimized(intervals); BAMFileSpan span = getFileSpan(intervals, getIndex()); // Create an iterator over the above chunk boundaries. final BAMFileIndexIterator iterator = new BAMFileIndexIterator(span == null ? null : span.toCoordinateArray()); // Add some preprocessing filters for edge-case reads that don't fit into this // query type. return new BAMQueryFilteringIterator(iterator, new BAMQueryMultipleIntervalsIteratorFilter(intervals, contained)); }
@Override public Chunk getChunk() { final SAMFileSource source = rec.getFileSource(); if (source == null) { throw new SAMException("No source (virtual file offsets); needed for indexing on BAM Record " + rec); } return ((BAMFileSpan) source.getFilePointer()).getSingleChunk(); } });
private void processAlignment(final SBIIndexWriter indexWriter, final SAMRecord rec) { final SAMFileSource source = rec.getFileSource(); if (source == null) { throw new SAMException("No source (virtual file offsets); needed for indexing on BAM Record " + rec); } final BAMFileSpan filePointer = (BAMFileSpan) source.getFilePointer(); indexWriter.processRecord(filePointer.getFirstOffset()); }
@Override public CloseableIterator<SAMRecord> queryAlignmentStart(final String sequence, final int start) { final SAMFileHeader fileHeader = getFileHeader(); final int referenceIndex = fileHeader.getSequenceIndex(sequence); return new CRAMIntervalIterator(new QueryInterval[]{new QueryInterval(referenceIndex, start, -1)}, true); }
private void validateReadGroup(final SAMRecord record, final SAMFileHeader header) { final SAMReadGroupRecord rg = record.getReadGroup(); if (rg == null) { addError(new SAMValidationError(Type.RECORD_MISSING_READ_GROUP, "A record is missing a read group", record.getReadName())); } else if (header.getReadGroup(rg.getId()) == null) { addError(new SAMValidationError(Type.READ_GROUP_NOT_FOUND, "A record has a read group not found in the header: ", record.getReadName() + ", " + rg.getReadGroupId())); } }
/** * reinitialize all data structures when the reference changes */ void startNewReference() { ++currentReference; // I'm not crazy about recycling this object, but that is the way it was originally written and // it helps keep track of no-coordinate read count (which shouldn't be stored in this class anyway). indexStats.newReference(); binningIndexBuilder = new BinningIndexBuilder(currentReference, sequenceDictionary.getSequence(currentReference).getSequenceLength()); } }
/** * Create a CRAM indexer that writes BAI to a stream. * * @param output Index will be written here. output will be closed when finish() method is called. * @param fileHeader header for the corresponding bam file. */ public CRAMBAIIndexer(final OutputStream output, final SAMFileHeader fileHeader) { numReferences = fileHeader.getSequenceDictionary().size(); indexBuilder = new BAMIndexBuilder(fileHeader); outputWriter = new BinaryBAMIndexWriter(numReferences, output); }
private void parsePGLine(final ParsedHeaderLine parsedHeaderLine) { assert(HeaderRecordType.PG.equals(parsedHeaderLine.getHeaderRecordType())); if (!parsedHeaderLine.requireTag(SAMProgramRecord.PROGRAM_GROUP_ID_TAG)) { return; } final SAMProgramRecord programRecord = new SAMProgramRecord(parsedHeaderLine.removeValue(SAMProgramRecord.PROGRAM_GROUP_ID_TAG)); transferAttributes(programRecord, parsedHeaderLine.mKeyValuePairs); mFileHeader.addProgramRecord(programRecord); }
@Override public Chunk getChunk() { final SAMFileSource source = rec.getFileSource(); if (source == null) { throw new SAMException("No source (virtual file offsets); needed for indexing on BAM Record " + rec); } return ((BAMFileSpan) source.getFilePointer()).getSingleChunk(); } });
private void validateReadGroup(final SAMRecord record, final SAMFileHeader header) { final SAMReadGroupRecord rg = record.getReadGroup(); if (rg == null) { addError(new SAMValidationError(Type.RECORD_MISSING_READ_GROUP, "A record is missing a read group", record.getReadName())); } else if (header.getReadGroup(rg.getId()) == null) { addError(new SAMValidationError(Type.READ_GROUP_NOT_FOUND, "A record has a read group not found in the header: ", record.getReadName() + ", " + rg.getReadGroupId())); } }