@Test(dataProvider = "StartsOrEndsWithInsertionOrDeletionData", enabled = true) public void testRemoveTrailingDeletions(final String cigar, final boolean expected) { final Cigar originalCigar = TextCigarCodec.decode(cigar); final Cigar newCigar = AlignmentUtils.removeTrailingDeletions(originalCigar); Assert.assertEquals(originalCigar.equals(newCigar), !cigar.endsWith("D")); }
/** * Generates the CIGAR string from the Smith-Waterman alignment of the dangling path (where the * provided vertex is the sink) and the reference path. * * @param vertex the sink of the dangling chain * @param pruneFactor the prune factor to use in ignoring chain pieces * @return a SmithWaterman object which can be null if no proper alignment could be generated */ protected DanglingChainMergeHelper generateCigarAgainstDownwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) { final int minTailPathLength = Math.max(1, minDanglingBranchLength); // while heads can be 0, tails absolutely cannot // find the lowest common ancestor path between this vertex and the diverging master path if available final List<MultiDeBruijnVertex> altPath = findPathUpwardsToLowestCommonAncestor(vertex, pruneFactor); if ( altPath == null || isRefSource(altPath.get(0)) || altPath.size() < minTailPathLength + 1 ) // add 1 to include the LCA return null; // now get the reference path from the LCA final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.downwards, Arrays.asList(incomingEdgeOf(altPath.get(1)))); // create the Smith-Waterman strings to use final byte[] refBases = getBasesForPath(refPath, false); final byte[] altBases = getBasesForPath(altPath, false); // run Smith-Waterman to determine the best alignment (and remove trailing deletions since they aren't interesting) final SmithWaterman alignment = new SWPairwiseAlignment(refBases, altBases, SWParameterSet.STANDARD_NGS, SWPairwiseAlignment.OVERHANG_STRATEGY.LEADING_INDEL); return new DanglingChainMergeHelper(altPath, refPath, altBases, refBases, AlignmentUtils.removeTrailingDeletions(alignment.getCigar())); }
/** * Generates the CIGAR string from the Smith-Waterman alignment of the dangling path (where the * provided vertex is the source) and the reference path. * * @param vertex the source of the dangling head * @param pruneFactor the prune factor to use in ignoring chain pieces * @return a SmithWaterman object which can be null if no proper alignment could be generated */ protected DanglingChainMergeHelper generateCigarAgainstUpwardsReferencePath(final MultiDeBruijnVertex vertex, final int pruneFactor, final int minDanglingBranchLength) { // find the highest common descendant path between vertex and the reference source if available final List<MultiDeBruijnVertex> altPath = findPathDownwardsToHighestCommonDescendantOfReference(vertex, pruneFactor); if ( altPath == null || isRefSink(altPath.get(0)) || altPath.size() < minDanglingBranchLength + 1 ) // add 1 to include the LCA return null; // now get the reference path from the LCA final List<MultiDeBruijnVertex> refPath = getReferencePath(altPath.get(0), TraversalDirection.upwards, Collections.<MultiSampleEdge>emptyList()); // create the Smith-Waterman strings to use final byte[] refBases = getBasesForPath(refPath, true); final byte[] altBases = getBasesForPath(altPath, true); // run Smith-Waterman to determine the best alignment (and remove trailing deletions since they aren't interesting) final SmithWaterman alignment = new SWPairwiseAlignment(refBases, altBases, SWParameterSet.STANDARD_NGS, SWPairwiseAlignment.OVERHANG_STRATEGY.LEADING_INDEL); return new DanglingChainMergeHelper(altPath, refPath, altBases, refBases, AlignmentUtils.removeTrailingDeletions(alignment.getCigar())); }