/** * Trim cigar down to one that starts at start reference on the left and extends to end on the reference * * @param cigar a non-null Cigar to trim down * @param start Where should we start keeping bases on the reference? The first position is 0 * @param end Where should we stop keeping bases on the reference? The maximum value is cigar.getReferenceLength() * @return a new Cigar with reference length == start - end + 1 */ public static Cigar trimCigarByReference(final Cigar cigar, final int start, final int end) { if ( start < 0 ) throw new IllegalArgumentException("Start must be >= 0 but got " + start); if ( end < start ) throw new IllegalArgumentException("End " + end + " is < start start " + start); if ( end > cigar.getReferenceLength() ) throw new IllegalArgumentException("End is beyond the cigar's reference length " + end + " for cigar " + cigar ); final Cigar result = trimCigar(cigar, start, end, true); if ( result.getReferenceLength() != end - start + 1) throw new IllegalStateException("trimCigarByReference failure: start " + start + " end " + end + " for " + cigar + " resulted in cigar with wrong size " + result); return result; }
public int getReferenceLength(String cigarString) { // Use htsjdk class for now TextCigarCodec codec = new TextCigarCodec(); Cigar cigar = codec.decode(cigarString); return cigar.getReferenceLength(); }
private BreakendSummary endBreakend(SAMSequenceDictionary dict) { int endpos = pos + cigar.getReferenceLength() - 1; return new BreakendSummary(rnameToReferenceIndex(dict, rname), BreakendDirection.Forward, endpos); } private static int rnameToReferenceIndex(SAMSequenceDictionary dict, String rname) {
/** * @return 1-based inclusive rightmost position of the sequence remaining after clipping or 0 * if there is no position, e.g. for unmapped read. */ public int getAlignmentEnd() { if (getReadUnmappedFlag()) { return NO_ALIGNMENT_START; } else if (this.mAlignmentEnd == NO_ALIGNMENT_START) { this.mAlignmentEnd = mAlignmentStart + getCigar().getReferenceLength() - 1; } return this.mAlignmentEnd; }
/** * @return 1-based inclusive rightmost position of the sequence remaining after clipping or 0 * if there is no position, e.g. for unmapped read. */ public int getAlignmentEnd() { if (getReadUnmappedFlag()) { return NO_ALIGNMENT_START; } else if (this.mAlignmentEnd == NO_ALIGNMENT_START) { this.mAlignmentEnd = mAlignmentStart + getCigar().getReferenceLength() - 1; } return this.mAlignmentEnd; }
/** * @return 1-based inclusive rightmost position of the clipped sequence, or 0 read if unmapped. */ public int getAlignmentEnd() { if (getReadUnmappedFlag()) { return NO_ALIGNMENT_START; } else if (this.mAlignmentEnd == NO_ALIGNMENT_START) { this.mAlignmentEnd = mAlignmentStart + getCigar().getReferenceLength() - 1; } return this.mAlignmentEnd; }
protected Range<Long> rangeOf(long startOffset, Cigar cigar) { int refLength = (cigar == null || cigar.isEmpty()) ? 1 : cigar.getReferenceLength(); return Range.closedOpen(startOffset, startOffset + refLength); }
private static boolean isReadsOverlap(SAMRecord record, int start, int position, int mateAlignmentStart){ if (position >= mateAlignmentStart) { return start >= mateAlignmentStart && start <= (mateAlignmentStart + record.getCigar().getReferenceLength() - 1); } else { return start >= mateAlignmentStart && record.getMateAlignmentStart() <= record.getAlignmentEnd(); } }
@Test(enabled = true) public void testLeftAlignCigarSequentiallyAdjacentID() { final String ref = "GTCTCTCTCTCTCTCTCTATATATATATATATATTT"; final String hap = "GTCTCTCTCTCTCTCTCTCTCTATATATATATATTT"; final Cigar originalCigar = TextCigarCodec.decode("18M4I12M4D2M"); final Cigar result = CigarUtils.leftAlignCigarSequentially(originalCigar, ref.getBytes(), hap.getBytes(), 0, 0); logger.warn("Result is " + result); Assert.assertEquals(originalCigar.getReferenceLength(), result.getReferenceLength(), "Reference lengths are different"); } }
/** * Create a "Sentinel" cigar that will be placed in BAM file when the actual cigar has more than 0xffff operator, * which are not supported by the bam format. The actual cigar will be encoded and placed in the CG attribute. * @param cigar actual cigar to create sentinel cigar for * @return sentinel cigar xSyN with readLength (x) and referenceLength (y) matching the input cigar. */ public static Cigar makeSentinelCigar(final Cigar cigar) { // in BAM there are only 28 bits for a cigar operator, so this a protection against overflow. if (cigar.getReadLength() > BAMRecord.MAX_CIGAR_ELEMENT_LENGTH) { throw new IllegalArgumentException( String.format( "Cannot encode (to BAM) a record with more than %d cigar operations and a read-length greater than %d.", BAMRecord.MAX_CIGAR_OPERATORS, BAMRecord.MAX_CIGAR_ELEMENT_LENGTH)); } if (cigar.getReferenceLength() > BAMRecord.MAX_CIGAR_ELEMENT_LENGTH) { throw new IllegalArgumentException( String.format( "Cannot encode (to BAM) a record that has than %d cigar operations and spans more than %d bases on the reference.", BAMRecord.MAX_CIGAR_OPERATORS, BAMRecord.MAX_CIGAR_ELEMENT_LENGTH)); } return new Cigar(Arrays.asList( new CigarElement(cigar.getReadLength(), CigarOperator.S), new CigarElement(cigar.getReferenceLength(), CigarOperator.N))); }
/** * Create a "Sentinel" cigar that will be placed in BAM file when the actual cigar has more than 0xffff operator, * which are not supported by the bam format. The actual cigar will be encoded and placed in the CG attribute. * @param cigar actual cigar to create sentinel cigar for * @return sentinel cigar xSyN with readLength (x) and referenceLength (y) matching the input cigar. */ public static Cigar makeSentinelCigar(final Cigar cigar) { // in BAM there are only 28 bits for a cigar operator, so this a protection against overflow. if (cigar.getReadLength() > BAMRecord.MAX_CIGAR_ELEMENT_LENGTH) { throw new IllegalArgumentException( String.format( "Cannot encode (to BAM) a record with more than %d cigar operations and a read-length greater than %d.", BAMRecord.MAX_CIGAR_OPERATORS, BAMRecord.MAX_CIGAR_ELEMENT_LENGTH)); } if (cigar.getReferenceLength() > BAMRecord.MAX_CIGAR_ELEMENT_LENGTH) { throw new IllegalArgumentException( String.format( "Cannot encode (to BAM) a record that has than %d cigar operations and spans more than %d bases on the reference.", BAMRecord.MAX_CIGAR_OPERATORS, BAMRecord.MAX_CIGAR_ELEMENT_LENGTH)); } return new Cigar(Arrays.asList( new CigarElement(cigar.getReadLength(), CigarOperator.S), new CigarElement(cigar.getReferenceLength(), CigarOperator.N))); }
/** * Create a "Sentinel" cigar that will be placed in BAM file when the actual cigar has more than 0xffff operator, * which are not supported by the bam format. The actual cigar will be encoded and placed in the CG attribute. * @param cigar actual cigar to create sentinel cigar for * @return sentinel cigar xSyN with readLength (x) and referenceLength (y) matching the input cigar. */ public static Cigar makeSentinelCigar(final Cigar cigar) { // in BAM there are only 28 bits for a cigar operator, so this a protection against overflow. if (cigar.getReadLength() > BAMRecord.MAX_CIGAR_ELEMENT_LENGTH) { throw new IllegalArgumentException( String.format( "Cannot encode (to BAM) a record with more than %d cigar operations and a read-length greater than %d.", BAMRecord.MAX_CIGAR_OPERATORS, BAMRecord.MAX_CIGAR_ELEMENT_LENGTH)); } if (cigar.getReferenceLength() > BAMRecord.MAX_CIGAR_ELEMENT_LENGTH) { throw new IllegalArgumentException( String.format( "Cannot encode (to BAM) a record that has than %d cigar operations and spans more than %d bases on the reference.", BAMRecord.MAX_CIGAR_OPERATORS, BAMRecord.MAX_CIGAR_ELEMENT_LENGTH)); } return new Cigar(Arrays.asList( new CigarElement(cigar.getReadLength(), CigarOperator.S), new CigarElement(cigar.getReferenceLength(), CigarOperator.N))); }
@Test(dataProvider = "referenceLengthData") public void testGetReferenceLength(final String textCigar, final int referenceLength, final int paddedReferenceLenght) throws Exception{ final Cigar cigar = TextCigarCodec.decode(textCigar); Assert.assertEquals(cigar.getReferenceLength(), referenceLength); Assert.assertEquals(cigar.getPaddedReferenceLength(), paddedReferenceLenght); }
/** * Adjusts the start/end alignment bounds of the given alignment. Expanded alignments are converted * @param r * @param expandStartBy * @param expandEndBy * @return record passed in */ public static SAMRecord adjustAlignmentBounds(SAMRecord r, int expandStartBy, int expandEndBy) { Cigar newCigar = softClipToAligned(r.getCigar(), expandStartBy, 0); r.setAlignmentStart(r.getAlignmentStart() + r.getCigar().getReferenceLength() - newCigar.getReferenceLength()); newCigar = softClipToAligned(newCigar, 0, expandEndBy); r.setCigar(newCigar); return r; } private static Cigar softClipToAligned(Cigar cigar, int startunclip, int endunclip) {
/** * This method uses the MateCigar value as determined from the attribute MC. It must be non-null. * * @param rec the SAM record * @return 1-based inclusive rightmost position of the clipped mate sequence, or 0 read if unmapped. */ public static int getMateAlignmentEnd(final SAMRecord rec) { if (rec.getMateUnmappedFlag()) { throw new RuntimeException("getMateAlignmentEnd called on an unmapped mate: " + rec); } final Cigar mateCigar = SAMUtils.getMateCigar(rec); if (mateCigar == null) { throw new SAMException("Mate CIGAR (Tag MC) not found:" + rec); } return CoordMath.getEnd(rec.getMateAlignmentStart(), mateCigar.getReferenceLength()); }
/** * This method uses the MateCigar value as determined from the attribute MC. It must be non-null. * @param rec the SAM record * @return 1-based inclusive rightmost position of the clipped mate sequence, or 0 read if unmapped. */ public static int getMateAlignmentEnd(final SAMRecord rec) { if (rec.getMateUnmappedFlag()) { throw new RuntimeException("getMateAlignmentEnd called on an unmapped mate."); } final Cigar mateCigar = SAMUtils.getMateCigar(rec); if (mateCigar == null) { throw new SAMException("Mate CIGAR (Tag MC) not found."); } return CoordMath.getEnd(rec.getMateAlignmentStart(), mateCigar.getReferenceLength()); }
/** * This method uses the MateCigar value as determined from the attribute MC. It must be non-null. * * @param rec the SAM record * @return 1-based inclusive rightmost position of the clipped mate sequence, or 0 read if unmapped. */ public static int getMateAlignmentEnd(final SAMRecord rec) { if (rec.getMateUnmappedFlag()) { throw new RuntimeException("getMateAlignmentEnd called on an unmapped mate: " + rec); } final Cigar mateCigar = SAMUtils.getMateCigar(rec); if (mateCigar == null) { throw new SAMException("Mate CIGAR (Tag MC) not found:" + rec); } return CoordMath.getEnd(rec.getMateAlignmentStart(), mateCigar.getReferenceLength()); }
/** * This method uses the MateCigar value as determined from the attribute MC. It must be non-null. * * @param rec the SAM record * @return 1-based inclusive rightmost position of the clipped mate sequence, or 0 read if unmapped. */ public static int getMateAlignmentEnd(final SAMRecord rec) { if (rec.getMateUnmappedFlag()) { throw new RuntimeException("getMateAlignmentEnd called on an unmapped mate: " + rec); } final Cigar mateCigar = SAMUtils.getMateCigar(rec); if (mateCigar == null) { throw new SAMException("Mate CIGAR (Tag MC) not found:" + rec); } return CoordMath.getEnd(rec.getMateAlignmentStart(), mateCigar.getReferenceLength()); }
@Test(dataProvider="clipData") public void basicTest(final String testName, final int start, final String inputCigar, final boolean negativeStrand, final int clipPosition, final String expectedCigar, final int expectedAdjustedStart) throws IOException { List<CigarElement> cigar = TextCigarCodec.decode(inputCigar).getCigarElements(); if (negativeStrand){ List<CigarElement> copiedList = new ArrayList<CigarElement>(cigar); Collections.reverse(copiedList); cigar = copiedList; } List<CigarElement> result = CigarUtil.softClipEndOfRead(clipPosition, cigar); Cigar newCigar = new Cigar(result); Cigar oldCigar = new Cigar(cigar); if (negativeStrand){ Collections.reverse(result); newCigar = new Cigar(result); int oldLength = oldCigar.getReferenceLength(); int newLength = newCigar.getReferenceLength(); int sizeChange = oldLength - newLength; //Assert.assertEquals(sizeChange, numClippedBases + adjustment, testName + " sizeChange == numClippedBases"); Assert.assertEquals(start + sizeChange, expectedAdjustedStart, sizeChange + " " + testName); Assert.assertTrue(sizeChange >= 0, "sizeChange >= 0. " + sizeChange); } Assert.assertEquals (TextCigarCodec.encode(newCigar), expectedCigar, testName); Assert.assertEquals(newCigar.getReadLength(), oldCigar.getReadLength()); Assert.assertNull(newCigar.isValid(testName, -1)); }
public static SimpleBEDFeature getSplitReadDeletion(SAMRecord r, ChimericAlignment ca, double minLengthPortion, int minMapq, int minIndelSize) { if (r.getMappingQuality() < minMapq) return null; if (ca.mapq < minMapq) return null; if (!ca.rname.equals(r.getReferenceName())) return null; if (ca.isNegativeStrand != r.getReadNegativeStrandFlag()) return null; if (r.getCigar().getReferenceLength() < r.getReadLength() * minLengthPortion) return null; if (ca.cigar.getReferenceLength() < r.getReadLength() * minLengthPortion) return null; int caStartClipLength = CigarUtil.getStartClipLength(ca.cigar.getCigarElements()); int rStartClipLength = CigarUtil.getStartClipLength(r.getCigar().getCigarElements()); if (r.getAlignmentEnd() < ca.pos && caStartClipLength > rStartClipLength) { if (ca.pos - r.getAlignmentEnd() >= minIndelSize) { return new SimpleBEDFeature(r.getAlignmentEnd(), ca.pos, r.getReferenceName()); } } int endpos = ca.pos + ca.cigar.getReferenceLength() - 1; if (endpos < r.getAlignmentStart() && caStartClipLength < rStartClipLength) { if (r.getAlignmentStart() - endpos >= minIndelSize) { return new SimpleBEDFeature(endpos, r.getAlignmentStart(), r.getReferenceName()); } } return null; } public static void main(String[] argv) {