/** * Does read start at the same position as described by currentContextIndex and currentAlignmentStart? * * @param read the read we want to test * @param currentContigIndex the contig index (from the read's getReferenceIndex) of the reads in this state manager * @param currentAlignmentStart the alignment start of the of the left-most position on the * genome of the reads in this read state manager * @return true if read has contig index and start equal to the current ones */ private boolean readStartsAtCurrentPosition(final GATKSAMRecord read, final int currentContigIndex, final int currentAlignmentStart) { return read.getAlignmentStart() == currentAlignmentStart && read.getReferenceIndex() == currentContigIndex; }
/** * Get the reference index of the underlying read * * @return the reference index of the read */ @Ensures("result == getRead().getReferenceIndex()") public int getReferenceIndex() { return getRead().getReferenceIndex(); }
/** * Add a read to the manager * * @param read the read to add */ public void addRead(final GATKSAMRecord read) { if ( read == null ) throw new IllegalArgumentException("read added to manager is null, which is not allowed"); // if the new read is on a different contig or we have too many reads, then we need to flush the queue and clear the map final boolean tooManyReads = getNReadsInQueue() >= MAX_RECORDS_IN_MEMORY; final boolean encounteredNewContig = getNReadsInQueue() > 0 && !waitingReads.peek().read.getReferenceIndex().equals(read.getReferenceIndex()); if ( tooManyReads || encounteredNewContig ) { if ( DEBUG ) logger.warn("Flushing queue on " + (tooManyReads ? "too many reads" : ("move to new contig: " + read.getReferenceName() + " from " + waitingReads.peek().read.getReferenceName())) + " at " + read.getAlignmentStart()); final int targetQueueSize = encounteredNewContig ? 0 : MAX_RECORDS_IN_MEMORY / 2; // write the required number of waiting reads to disk while ( getNReadsInQueue() > targetQueueSize ) writer.addAlignment(waitingReads.poll().read); } final SplitRead splitRead = new SplitRead(read); // fix overhangs, as needed for ( final Splice splice : splices) fixSplit(splitRead, splice); // add the new read to the queue waitingReads.add(splitRead); }
/** * Add read to this active region * * Read must have alignment start >= than the last read currently in this active region. * * @throws IllegalArgumentException if read doesn't overlap the extended region of this active region * * @param read a non-null GATKSAMRecord */ @Ensures("reads.size() == old(reads.size()) + 1") public void add( final GATKSAMRecord read ) { if ( read == null ) throw new IllegalArgumentException("Read cannot be null"); final GenomeLoc readLoc = genomeLocParser.createGenomeLoc( read ); if ( ! readOverlapsRegion(read) ) throw new IllegalArgumentException("Read location " + readLoc + " doesn't overlap with active region extended span " + extendedLoc); spanIncludingReads = spanIncludingReads.union( readLoc ); if ( ! reads.isEmpty() ) { final GATKSAMRecord lastRead = reads.get(size() - 1); if ( ! lastRead.getReferenceIndex().equals(read.getReferenceIndex()) ) throw new IllegalArgumentException("Attempting to add a read to ActiveRegion not on the same contig as other reads: lastRead " + lastRead + " attempting to add " + read); if ( read.getAlignmentStart() < lastRead.getAlignmentStart() ) throw new IllegalArgumentException("Attempting to add a read to ActiveRegion out of order w.r.t. other reads: lastRead " + lastRead + " at " + lastRead.getAlignmentStart() + " attempting to add " + read + " at " + read.getAlignmentStart()); } reads.add( read ); }
private static boolean unclippedReadOverlapsRegion(final GATKSAMRecord sampleRead, final int referenceIndex, final int start, final int end) { final int readReference = sampleRead.getReferenceIndex(); if (readReference != referenceIndex) return false; final int readStart = sampleRead.getUnclippedStart(); if (readStart > end) return false; final int readEnd = sampleRead.getReadUnmappedFlag() ? sampleRead.getUnclippedEnd() : Math.max(sampleRead.getUnclippedEnd(), sampleRead.getUnclippedStart()); return readEnd >= start; }
/** * Is the read dead? That is, can it no longer be in any future active region, and therefore can be discarded? * * read: start |--------> stop ------ stop + extension * region: start |-----------------| end * * Since the regions are coming in order, read could potentially be contained in a future interval if * stop + activeRegionExtension >= end. If, on the other hand, stop + extension is < the end * of this region, then we can discard it, since any future region could only include reads * up to end + 1 - extension. * * Note that this function doesn't care about the dead zone. We're assuming that by * actually calling this function with an active region that region is already in the dead zone, * so checking that the read is in the dead zone doesn't make sense. * * @param read the read we're testing * @param activeRegion the current active region * @return true if the read is dead, false other */ @Requires({"read != null", "activeRegion != null"}) private boolean readCannotOccurInAnyMoreActiveRegions(final GATKSAMRecord read, final ActiveRegion activeRegion) { return read.getReferenceIndex() < activeRegion.getLocation().getContigIndex() || ( read.getReferenceIndex() == activeRegion.getLocation().getContigIndex() && read.getAlignmentEnd() + getActiveRegionExtension() < activeRegion.getLocation().getStop() ); }
/** * Did read appear in the last shard? * * When we transition across shard boundaries we see duplicate reads because * each shard contains the reads that *overlap* the shard. So if we just finished * shard 1-1000 and are now in 1001-2000 we'll see duplicate reads from 1001 * that overlapped 1-1000. This function tests read to determine if we would have * seen it before by asking if read.getAlignmentStart() is less than the * stop position of the last seen read at the start of the traversal. The reason * we need to use the location of the last read at the start of the traversal * is that we update the lastRead during the traversal, and we only want to filter * out reads whose start is before the last read of the previous shard, not the * current shard. * * @param locOfLastReadAtTraversalStart the location of the last read seen at the start of the traversal * @param read the read we want to test if it's already been seen in the last shard * @return true if read would have appeared in the last shard, false otherwise */ @Requires({"read != null"}) private boolean appearedInLastShard(final GenomeLoc locOfLastReadAtTraversalStart, final GATKSAMRecord read) { if ( locOfLastReadAtTraversalStart == null ) // we're in the first shard, so obviously the answer is no return false; else { // otherwise check to see if the alignment occurred in the previous shard return read.getAlignmentStart() <= locOfLastReadAtTraversalStart.getStart() // we're on the same contig && read.getReferenceIndex() == locOfLastReadAtTraversalStart.getContigIndex(); } }
if ( clippedFirstRead.getAlignmentEnd() < clippedSecondRead.getAlignmentStart() || !clippedFirstRead.getReferenceIndex().equals(clippedSecondRead.getReferenceIndex()) ) return;
@Test( ) public void testCreationFromSAMRecord() { final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5); final GenomeLoc loc = genomeLocParser.createGenomeLoc(read); Assert.assertEquals(loc.getContig(), read.getReferenceName()); Assert.assertEquals(loc.getContigIndex(), (int)read.getReferenceIndex()); Assert.assertEquals(loc.getStart(), read.getAlignmentStart()); Assert.assertEquals(loc.getStop(), read.getAlignmentEnd()); }
@Test( ) public void testCreationFromSAMRecordUnmappedButOnGenome() { final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5); read.setReadUnmappedFlag(true); read.setCigarString("*"); final GenomeLoc loc = genomeLocParser.createGenomeLoc(read); Assert.assertEquals(loc.getContig(), read.getReferenceName()); Assert.assertEquals(loc.getContigIndex(), (int)read.getReferenceIndex()); Assert.assertEquals(loc.getStart(), read.getAlignmentStart()); Assert.assertEquals(loc.getStop(), read.getAlignmentStart()); }
/** * Tests the GenomeLoc variable in the ReadBin after adding arbitrary reads * * @param cigarString the read's cigar string * @param alignmentStart the read's alignment start */ @Test(enabled = true, dataProvider = "reads") public void testAddingReads(String cigarString, int alignmentStart) { final GATKSAMRecord read = createReadAndAddToBin(cigarString, alignmentStart); final GenomeLoc readLoc = parser.createGenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getSoftStart(), Math.max(read.getSoftStart(), read.getSoftEnd())); Assert.assertEquals(readBin.getLocation(), readLoc); readBin.clear(); }
if ( read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ) { cleanAndCallMap(ref, read, metaDataTracker, null); return 0;
this.with_itself_and_mate_mapped++; if (!read.getReferenceIndex().equals(read.getMateReferenceIndex())) { this.with_mate_mapped_to_a_different_chr++;
/** * Creates a hard-clipped view on a existing read record. * @param read the underlying unclipped read. * @param start inclusive first position in {@code read} included in the clipped view. * @param end inclusive last position in {@code read} included in the clipped view. */ public ClippedGATKSAMRecord(final GATKSAMRecord read, int start, int end) { super(read.getHeader()); this.setReferenceIndex(read.getReferenceIndex()); this.setAlignmentStart(read.getAlignmentStart() + start); this.setMappingQuality(100); // setting read indexing bin below this.setFlags(read.getFlags()); this.setMateReferenceIndex(read.getMateReferenceIndex()); this.setMateAlignmentStart(read.getMateAlignmentStart()); this.setInferredInsertSize(read.getInferredInsertSize()); this.setReadBases(Arrays.copyOfRange(read.getReadBases(), start, end)); this.setBaseQualities(Arrays.copyOfRange(read.getBaseQualities(),start,end)); this.setReadName(read.getReadName()); insertionQuals = Arrays.copyOfRange(read.getBaseInsertionQualities(),start,end); deletionQuals = Arrays.copyOfRange(read.getBaseDeletionQualities(),start,end); // Set these to null in order to mark them as being candidates for lazy initialization. // If this is not done, they will have non-null defaults. super.setReadName(null); super.setCigarString(null); super.setReadBases(null); super.setBaseQualities(null); // Do this after the above because setCigarString will clear it. GATKBin.setReadIndexingBin(this, -1); }
if ( isEmpty() ) { firstContigIndex = iterator.peek().getReferenceIndex(); firstAlignmentStart = iterator.peek().getAlignmentStart(); } else {
emptyRead.setReferenceIndex(read.getReferenceIndex()); emptyRead.setAlignmentStart(0); emptyRead.setMappingQuality(0);