/** * Calculates the for the predefined NM tag from the SAM spec. To the result of * countMismatches() it adds 1 for each indel. * @param referenceOffset 0-based offset of the first element of referenceBases relative to the start * of that reference sequence. */ public static int calculateSamNmTag(final SAMRecord read, final byte[] referenceBases, final int referenceOffset) { return calculateSamNmTag(read, referenceBases, referenceOffset, false); }
/** * Calculates the predefined NM tag from the SAM spec: (# of mismatches + # of indels) * For the purposes for calculating mismatches, we do not yet support IUPAC ambiguous codes * (see <code>readBaseMatchesRefBaseWithAmbiguity</code> method). * * @param referenceOffset 0-based offset of the first element of referenceBases relative to the start * of that reference sequence. */ public static int calculateSamNmTag(final SAMRecord read, final byte[] referenceBases, final int referenceOffset) { return calculateSamNmTag(read, referenceBases, referenceOffset, false); }
/** * Calculates the predefined NM tag from the SAM spec: (# of mismatches + # of indels) * For the purposes for calculating mismatches, we do not yet support IUPAC ambiguous codes * (see <code>readBaseMatchesRefBaseWithAmbiguity</code> method). */ public static int calculateSamNmTag(final SAMRecord read, final byte[] referenceBases) { return calculateSamNmTag(read, referenceBases, 0, false); }
/** * Calculates the predefined NM tag from the SAM spec: (# of mismatches + # of indels) * For the purposes for calculating mismatches, we do not yet support IUPAC ambiguous codes * (see <code>readBaseMatchesRefBaseWithAmbiguity</code> method). * * @param referenceOffset 0-based offset of the first element of referenceBases relative to the start * of that reference sequence. */ public static int calculateSamNmTag(final SAMRecord read, final byte[] referenceBases, final int referenceOffset) { return calculateSamNmTag(read, referenceBases, referenceOffset, false); }
/** * Calculates the predefined NM tag from the SAM spec: (# of mismatches + # of indels) * For the purposes for calculating mismatches, we do not yet support IUPAC ambiguous codes * (see <code>readBaseMatchesRefBaseWithAmbiguity</code> method). */ public static int calculateSamNmTag(final SAMRecord read, final byte[] referenceBases) { return calculateSamNmTag(read, referenceBases, 0, false); }
/** * Calculates the for the predefined NM tag from the SAM spec. To the result of * countMismatches() it adds 1 for each indel. */ public static int calculateSamNmTag(final SAMRecord read, final byte[] referenceBases) { return calculateSamNmTag(read, referenceBases, 0, false); }
/** Calculates and sets the NM, MD, and and UQ tags from the record and the reference * * @param record the record to be fixed * @param refSeqWalker a ReferenceSequenceWalker that will be used to traverse the reference * @param isBisulfiteSequence a flag indicating whether the sequence came from bisulfite-sequencing which would imply a different * calculation of the NM tag. * * No return value, modifies the provided record. */ public static void fixNmMdAndUq(final SAMRecord record, final ReferenceSequenceFileWalker refSeqWalker, final boolean isBisulfiteSequence) { final byte[] referenceBases = refSeqWalker.get(record.getReferenceIndex()).getBases(); // only recalculate NM if it isn't bisulfite, since it needs to be treated specially below SequenceUtil.calculateMdAndNmTags(record, referenceBases, true, !isBisulfiteSequence); if (isBisulfiteSequence) { // recalculate the NM tag for bisulfite data record.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(record, referenceBases, 0, isBisulfiteSequence)); } fixUq(record, refSeqWalker, isBisulfiteSequence); }
read.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(read, reference, leftmostIndex - 1)); if ( read.getAttribute(SAMTag.UQ.name()) != null ) read.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(read, reference, leftmostIndex-1));
/** Calculates and sets the NM, MD, and and UQ tags from the record and the reference * * @param record the record to be fixed * @param refSeqWalker a ReferenceSequenceWalker that will be used to traverse the reference * @param isBisulfiteSequence a flag indicating whether the sequence came from bisulfite-sequencing which would imply a different * calculation of the NM tag. * * No return value, modifies the provided record. */ public static void fixNmMdAndUq(final SAMRecord record, final ReferenceSequenceFileWalker refSeqWalker, final boolean isBisulfiteSequence) { final byte[] referenceBases = refSeqWalker.get(record.getReferenceIndex()).getBases(); // only recalculate NM if it isn't bisulfite, since it needs to be treated specially below SequenceUtil.calculateMdAndNmTags(record, referenceBases, true, !isBisulfiteSequence); if (isBisulfiteSequence) { // recalculate the NM tag for bisulfite data record.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(record, referenceBases, 0, isBisulfiteSequence)); } fixUq(record, refSeqWalker, isBisulfiteSequence); }
private void validateNmTag(final SAMRecord record, final long recordNumber) { if (!record.getReadUnmappedFlag()) { final Integer tagNucleotideDiffs = record.getIntegerAttribute(ReservedTagConstants.NM); if (tagNucleotideDiffs == null) { addError(new SAMValidationError( Type.MISSING_TAG_NM, "NM tag (nucleotide differences) is missing", record.getReadName(), recordNumber)); } else if (refFileWalker != null) { final ReferenceSequence refSequence = refFileWalker.get(record.getReferenceIndex()); final int actualNucleotideDiffs = SequenceUtil.calculateSamNmTag(record, refSequence.getBases(), 0, isBisulfiteSequenced()); if (!tagNucleotideDiffs.equals(actualNucleotideDiffs)) { addError(new SAMValidationError( Type.INVALID_TAG_NM, "NM tag (nucleotide differences) in file [" + tagNucleotideDiffs + "] does not match reality [" + actualNucleotideDiffs + "]", record.getReadName(), recordNumber)); } } } }
private void validateNmTag(final SAMRecord record, final long recordNumber) { if (!record.getReadUnmappedFlag()) { final Integer tagNucleotideDiffs = record.getIntegerAttribute(ReservedTagConstants.NM); if (tagNucleotideDiffs == null) { addError(new SAMValidationError( Type.MISSING_TAG_NM, "NM tag (nucleotide differences) is missing", record.getReadName(), recordNumber)); } else if (refFileWalker != null) { final ReferenceSequence refSequence = refFileWalker.get(record.getReferenceIndex()); final int actualNucleotideDiffs = SequenceUtil.calculateSamNmTag(record, refSequence.getBases(), 0, isBisulfiteSequenced()); if (!tagNucleotideDiffs.equals(actualNucleotideDiffs)) { addError(new SAMValidationError( Type.INVALID_TAG_NM, "NM tag (nucleotide differences) in file [" + tagNucleotideDiffs + "] does not match reality [" + actualNucleotideDiffs + "]", record.getReadName(), recordNumber)); } } } }
private void validateNmTag(final SAMRecord record, final long recordNumber) { if (!record.getReadUnmappedFlag()) { final Integer tagNucleotideDiffs = record.getIntegerAttribute(ReservedTagConstants.NM); if (tagNucleotideDiffs == null) { addError(new SAMValidationError( Type.MISSING_TAG_NM, "NM tag (nucleotide differences) is missing", record.getReadName(), recordNumber)); } else if (refFileWalker != null) { final ReferenceSequence refSequence = refFileWalker.get(record.getReferenceIndex()); final int actualNucleotideDiffs = SequenceUtil.calculateSamNmTag(record, refSequence.getBases(), 0, isBisulfiteSequenced()); if (!tagNucleotideDiffs.equals(actualNucleotideDiffs)) { addError(new SAMValidationError( Type.INVALID_TAG_NM, "NM tag (nucleotide differences) in file [" + tagNucleotideDiffs + "] does not match reality [" + actualNucleotideDiffs + "]", record.getReadName(), recordNumber)); } } } }
@Test public void testCalculateNmTag() { final File TEST_DIR = new File("src/test/resources/htsjdk/samtools/SequenceUtil"); final File referenceFile = new File(TEST_DIR, "reference_with_lower_and_uppercase.fasta"); final File samFile = new File(TEST_DIR, "upper_and_lowercase_read.sam"); SamReader reader = SamReaderFactory.makeDefault().open(samFile); ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceFile); reader.iterator().stream().forEach(r -> { Integer nm = SequenceUtil.calculateSamNmTag(r, ref.getSequence(r.getContig()).getBases()); String md = r.getStringAttribute(SAMTag.MD.name()); Assert.assertEquals(r.getIntegerAttribute(SAMTag.NM.name()), nm, "problem with NM in read \'" + r.getReadName() + "\':"); SequenceUtil.calculateMdAndNmTags(r, ref.getSequence(r.getContig()).getBases(), true, true); Assert.assertEquals(r.getIntegerAttribute(SAMTag.NM.name()), nm, "problem with NM in read \'" + r.getReadName() + "\':"); if (md != null) { Assert.assertEquals(r.getStringAttribute(SAMTag.MD.name()), md, "problem with MD in read \'" + r.getReadName() + "\':"); } }); }
public static SAMRecord ensureNmTag(ReferenceSequenceFile ref, SAMRecord record) { if (record == null) return record; if (record.getReadBases() == null) return record; if (record.getReadBases() == SAMRecord.NULL_SEQUENCE) return record; if (record.getIntegerAttribute(SAMTag.NM.name()) != null) return record; if (record.getReadUnmappedFlag()) return record; byte[] refSeq = ref .getSubsequenceAt(record.getReferenceName(), record.getAlignmentStart(), record.getAlignmentEnd()) .getBases(); final int actualNucleotideDiffs = SequenceUtil.calculateSamNmTag(record, refSeq, record.getAlignmentStart() - 1); record.setAttribute(SAMTag.NM.name(), actualNucleotideDiffs); return record; }