public void addQualityScores(final SAMRecord samRecord, final CramCompressionRecord cramRecord, final ReferenceTracks referenceTracks) { if (samRecord.getBaseQualities() == SAMRecord.NULL_QUALS) { cramRecord.qualityScores = SAMRecord.NULL_QUALS; cramRecord.setForcePreserveQualityScores(false); return; } final byte[] scores = new byte[samRecord.getReadLength()]; Arrays.fill(scores, (byte) -1); for (final PreservationPolicy preservationPolicy : policyList) addQS(samRecord, cramRecord, scores, referenceTracks, preservationPolicy); if (!cramRecord.isForcePreserveQualityScores()) { for (int i = 0; i < scores.length; i++) { if (scores[i] > -1) { if (cramRecord.readFeatures == null) cramRecord.readFeatures = new LinkedList<ReadFeature>(); cramRecord.readFeatures.add(new BaseQualityScore(i + 1, scores[i])); } } if (cramRecord.readFeatures != null) Collections.sort(cramRecord.readFeatures, readFeaturePositionComparator); } cramRecord.qualityScores = scores; }
private static CramCompressionRecord createMappedRecord(int i) { final CramCompressionRecord record = new CramCompressionRecord(); record.readBases = "AAA".getBytes(); record.qualityScores = "!!!".getBytes(); record.readLength = 3; record.readName = "" + i; record.sequenceId = 0; record.alignmentStart = i + 1; record.setLastSegment(true); record.setSegmentUnmapped(false); record.readFeatures = Collections.emptyList(); return record; }
/** * The method is similar in semantics to * {@link htsjdk.samtools.SamPairUtil#computeInsertSize(SAMRecord, SAMRecord) * computeInsertSize} but operates on CRAM native records instead of * SAMRecord objects. * * @param firstEnd first mate of the pair * @param secondEnd second mate of the pair * @return template length */ public static int computeInsertSize(final CramCompressionRecord firstEnd, final CramCompressionRecord secondEnd) { if (firstEnd.isSegmentUnmapped() || secondEnd.isSegmentUnmapped()) { return 0; } if (firstEnd.sequenceId != secondEnd.sequenceId) { return 0; } final int firstEnd5PrimePosition = firstEnd.isNegativeStrand() ? firstEnd.getAlignmentEnd() : firstEnd.alignmentStart; final int secondEnd5PrimePosition = secondEnd.isNegativeStrand() ? secondEnd.getAlignmentEnd() : secondEnd.alignmentStart; final int adjustment = (secondEnd5PrimePosition >= firstEnd5PrimePosition) ? +1 : -1; return secondEnd5PrimePosition - firstEnd5PrimePosition + adjustment; } }
private static void setNextMate(CramCompressionRecord r, CramCompressionRecord next) { r.mateAlignmentStart = next.alignmentStart; r.setMateUnmapped(next.isSegmentUnmapped()); r.setMateNegativeStrand(next.isNegativeStrand()); r.mateSequenceID = next.sequenceId; if (r.mateSequenceID == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) r.mateAlignmentStart = SAMRecord.NO_ALIGNMENT_START; }
@SuppressWarnings("SimplifiableIfStatement") @Override public boolean equals(final Object obj) { if (!(obj instanceof CramCompressionRecord)) return false; final CramCompressionRecord cramRecord = (CramCompressionRecord) obj; if (alignmentStart != cramRecord.alignmentStart) return false; if (isNegativeStrand() != cramRecord.isNegativeStrand()) return false; if (isVendorFiltered() != cramRecord.isVendorFiltered()) return false; if (isSegmentUnmapped() != cramRecord.isSegmentUnmapped()) return false; if (readLength != cramRecord.readLength) return false; if (isLastSegment() != cramRecord.isLastSegment()) return false; if (recordsToNextFragment != cramRecord.recordsToNextFragment) return false; if (isFirstSegment() != cramRecord.isFirstSegment()) return false; if (mappingQuality != cramRecord.mappingQuality) return false; if (!deepEquals(readFeatures, cramRecord.readFeatures)) return false; if (!Arrays.equals(readBases, cramRecord.readBases)) return false; return Arrays.equals(qualityScores, cramRecord.qualityScores) && areEqual(flags, cramRecord.flags) && areEqual(readName, cramRecord.readName); }
final CramCompressionRecord cramRecord = new CramCompressionRecord(); if (record.getReadPairedFlag()) { cramRecord.mateAlignmentStart = record.getMateAlignmentStart(); cramRecord.setMateUnmapped(record.getMateUnmappedFlag()); cramRecord.setMateNegativeStrand(record.getMateNegativeStrandFlag()); cramRecord.mateSequenceID = record.getMateReferenceIndex(); } else cramRecord.mateSequenceID = -1; cramRecord.setMultiFragment(record.getReadPairedFlag()); cramRecord.setProperPair(record.getReadPairedFlag() && record.getProperPairFlag()); cramRecord.setSegmentUnmapped(record.getReadUnmappedFlag()); cramRecord.setNegativeStrand(record.getReadNegativeStrandFlag()); cramRecord.setFirstSegment(record.getReadPairedFlag() && record.getFirstOfPairFlag()); cramRecord.setLastSegment(record.getReadPairedFlag() && record.getSecondOfPairFlag()); cramRecord.setSecondaryAlignment(record.isSecondaryAlignment()); cramRecord.setVendorFiltered(record.getReadFailsVendorQualityCheckFlag()); cramRecord.setDuplicate(record.getDuplicateReadFlag()); cramRecord.setSupplementary(record.getSupplementaryAlignmentFlag()); cramRecord.setDuplicate(record.getDuplicateReadFlag()); if (!record.getReadPairedFlag()) cramRecord.setLastSegment(false); else { if (record.getFirstOfPairFlag()) cramRecord.setLastSegment(false); else if (record.getSecondOfPairFlag()) cramRecord.setLastSegment(true); cramRecord.qualityScores = record.getBaseQualities(); if (version.compatibleWith(CramVersions.CRAM_v3)) cramRecord.setUnknownBases(record.getReadBases() == SAMRecord.NULL_SEQUENCE);
private static void copyFlags(final CramCompressionRecord cramRecord, final SAMRecord samRecord) { samRecord.setReadPairedFlag(cramRecord.isMultiFragment()); samRecord.setProperPairFlag(cramRecord.isProperPair()); samRecord.setReadUnmappedFlag(cramRecord.isSegmentUnmapped()); samRecord.setReadNegativeStrandFlag(cramRecord.isNegativeStrand()); samRecord.setFirstOfPairFlag(cramRecord.isFirstSegment()); samRecord.setSecondOfPairFlag(cramRecord.isLastSegment()); samRecord.setSecondaryAlignment(cramRecord.isSecondaryAlignment()); samRecord.setReadFailsVendorQualityCheckFlag(cramRecord.isVendorFiltered()); samRecord.setDuplicateReadFlag(cramRecord.isDuplicate()); samRecord.setSupplementaryAlignmentFlag(cramRecord.isSupplementary()); }
Map<String, CramCompressionRecord> secondaryMateMap = new TreeMap<String, CramCompressionRecord>(); for (CramCompressionRecord r : cramRecords) { if (!r.isMultiFragment()) { r.setDetached(true); r.setHasMateDownStream(false); r.recordsToNextFragment = -1; r.next = null; } else { String name = r.readName; Map<String, CramCompressionRecord> mateMap = r.isSecondaryAlignment() ? secondaryMateMap : primaryMateMap; CramCompressionRecord mate = mateMap.get(name); prev.next = r; r.previous = prev; r.previous.setHasMateDownStream(true); r.setHasMateDownStream(false); r.setDetached(false); r.previous.setDetached(false); if ((r.isFirstSegment() && last.isLastSegment()) || (last.isFirstSegment() && r.isLastSegment())) { || (r.isMateNegativeStrand() != last.isNegativeStrand()) || (last.isMateNegativeStrand() != r.isNegativeStrand()) || (r.mateAlignmentStart != last.alignmentStart) || (last.mateAlignmentStart != r.alignmentStart)) { detach(r);
compBitFlagsC.writeData(r.getCompressionFlags()); if (refId == Slice.MULTI_REFERENCE) { refIdCodec.writeData(r.sequenceId); if (r.isDetached()) { mateBitFlagsCodec.writeData(r.getMateFlags()); if (!captureReadNames) { readNameC.writeData(r.readName.getBytes(charset)); nextFragmentAlignmentStart.writeData(r.mateAlignmentStart); templateSize.writeData(r.templateSize); } else if (r.isHasMateDownStream()) { distanceC.writeData(r.recordsToNextFragment); if (!r.isSegmentUnmapped()) { if (r.isForcePreserveQualityScores()) { qualityScoreArrayCodec.writeData(r.qualityScores); if (!r.isUnknownBases()) { for (final byte b : r.readBases) { baseCodec.writeData(b); if (r.isForcePreserveQualityScores()) { qualityScoreArrayCodec.writeData(r.qualityScores);
else if (cramRecord.qualityScores != SAMRecord.NULL_QUALS) cramRecord.setForcePreserveQualityScores(true); final Map<String, CramCompressionRecord> secondaryMateMap = new TreeMap<String, CramCompressionRecord>(); for (final CramCompressionRecord r : cramRecords) { if (!r.isMultiFragment()) { r.setDetached(true); r.setHasMateDownStream(false); r.recordsToNextFragment = -1; r.next = null; } else { final String name = r.readName; final Map<String, CramCompressionRecord> mateMap = r.isSecondaryAlignment() ? secondaryMateMap : primaryMateMap; final CramCompressionRecord mate = mateMap.get(name); if (mate == null) { prev.next = r; r.previous = prev; r.previous.setHasMateDownStream(true); r.setHasMateDownStream(false); r.setDetached(false); r.previous.setDetached(false); if (cramRecord.isFirstSegment() && last.isLastSegment()) { cramRecord.setDetached(true); cramRecord.setHasMateDownStream(false);
if (cramRecord.isDetached()) { cramRecord.mateFlags = mateBitFlagCodec.readData(); if (!captureReadNames) { cramRecord.mateAlignmentStart = mateAlignmentStartCodec.readData(); cramRecord.templateSize = insertSizeCodec.readData(); } else if (cramRecord.isHasMateDownStream()) { cramRecord.recordsToNextFragment = distanceToNextFragmentCodec.readData(); if (!cramRecord.isSegmentUnmapped()) { if (cramRecord.isForcePreserveQualityScores()) { cramRecord.qualityScores = qualityScoreArrayCodec.readDataArray(cramRecord.readLength); if (cramRecord.isUnknownBases()) { cramRecord.readBases = SAMRecord.NULL_SEQUENCE; cramRecord.qualityScores = SAMRecord.NULL_QUALS; if (cramRecord.isForcePreserveQualityScores()) { cramRecord.qualityScores = qualityScoreArrayCodec.readDataArray(cramRecord.readLength); if (prevRecord != null) { System.err.printf("Failed at record %d. Here is the previously read record: %s\n", recordCounter, prevRecord.toString()); if (prevRecord != null) { System.err.printf("Failed at record %d. Here is the previously read record: %s\n", recordCounter, prevRecord.toString());
if (!record.isMultiFragment() || record.isDetached()) { record.recordsToNextFragment = -1; continue; if (record.isHasMateDownStream()) { final CramCompressionRecord downMate = records.get(record.index + record.recordsToNextFragment - startCounter); if (record.isSegmentUnmapped()) continue; if (record.isUnknownBases()) { record.readBases = SAMRecord.NULL_SEQUENCE; } else
public static int computeInsertSize(CramCompressionRecord firstEnd, CramCompressionRecord secondEnd) { if (firstEnd.isSegmentUnmapped() || secondEnd.isSegmentUnmapped()) { return 0; final int right = Math.max(Math.max(firstEnd.alignmentStart, firstEnd.getAlignmentEnd()), Math.max(secondEnd.alignmentStart, secondEnd.getAlignmentEnd())); final int left = Math.min(Math.min(firstEnd.alignmentStart, firstEnd.getAlignmentEnd()), Math.min(secondEnd.alignmentStart, secondEnd.getAlignmentEnd())); final int tlen = right - left + 1; if (firstEnd.getAlignmentEnd() != right) firstEnd.templateSize = tlen; else if (firstEnd.isFirstSegment()) firstEnd.templateSize = tlen; else if (secondEnd.getAlignmentEnd() != right) secondEnd.templateSize = tlen; else if (secondEnd.isFirstSegment()) secondEnd.templateSize = tlen; else
private void readCramRecord() { final CramCompressionRecord cramRecord = new CramCompressionRecord(); super.read(cramRecord); if (APDelta) { currentAlignmentStart += cramRecord.alignmentDelta; } else { currentAlignmentStart = cramRecord.alignmentStart; } if (!spans.containsKey(cramRecord.sequenceId)) { spans.put(cramRecord.sequenceId, new AlignmentSpan(currentAlignmentStart, cramRecord.readLength)); } else { spans.get(cramRecord.sequenceId).addSingle(currentAlignmentStart, cramRecord.readLength); } } }
@Test public void test_getAlignmentEnd() { CramCompressionRecord r = new CramCompressionRecord(); r.alignmentStart = 1; r.setSegmentUnmapped(true); Assert.assertEquals(r.getAlignmentEnd(), SAMRecord.NO_ALIGNMENT_START); r = new CramCompressionRecord(); int readLength = 100; r.alignmentStart = 1; r.readLength = readLength; r.setSegmentUnmapped(false); Assert.assertEquals(r.getAlignmentEnd(), r.readLength + r.alignmentStart - 1); r = new CramCompressionRecord(); r.alignmentStart = 1; r.readLength = readLength; r.setSegmentUnmapped(false); r.readFeatures = new ArrayList<>(); String softClip = "AAA"; r.readFeatures.add(new SoftClip(1, softClip.getBytes())); Assert.assertEquals(r.getAlignmentEnd(), r.readLength + r.alignmentStart - 1 - softClip.length()); r = new CramCompressionRecord(); r.alignmentStart = 1; r.readLength = readLength; r.setSegmentUnmapped(false); r.readFeatures = new ArrayList<>(); int deletionLength = 5; r.readFeatures.add(new Deletion(1, deletionLength));
private static CramCompressionRecord createRecord(int recordIndex, int seqId, int start) { byte[] bases = "AAAAA".getBytes(); int readLength = bases.length; final CramCompressionRecord record = new CramCompressionRecord(); record.setSegmentUnmapped(false); record.setMultiFragment(false); record.sequenceId = seqId; record.alignmentStart =start; record.readBases = record.qualityScores = bases; record.readName = Integer.toString(recordIndex); record.readLength = readLength; record.readFeatures = Collections.emptyList(); return record; } @Test
private static byte[] restoreQualityScores(final byte defaultQualityScore, final CramCompressionRecord record) { if (!record.isForcePreserveQualityScores()) { boolean star = true; final byte[] scores = new byte[record.readLength];
if (!record.isSegmentUnmapped() || record.readBases == null) { int end = record.getAlignmentEnd(); if (record.alignmentStart > SAMRecord.NO_ALIGNMENT_START && end >= record.alignmentStart) addSpan(record.sequenceId, record.alignmentStart, end - record.alignmentStart + 1, 1, map); continue; if (record.isUnknownBases()) { record.readBases = SAMRecord.NULL_SEQUENCE; } else { if (!record.isSegmentUnmapped() || record.readBases == null) record.readBases = restoreReadBases(record, region.array, (int) region.alignmentStart - 1, substitutionMatrix);
if (cramRecord.isSegmentUnmapped()) samRecord.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR); else .setMateAlignmentStart(cramRecord.mateAlignmentStart > 0 ? cramRecord.mateAlignmentStart : SAMRecord .NO_ALIGNMENT_START); samRecord.setMateNegativeStrandFlag(cramRecord.isMateNegativeStrand()); samRecord.setMateUnmappedFlag(cramRecord.isMateUnmapped()); } else { samRecord
if (!record.isMultiFragment() || record.isDetached()) { record.recordsToNextFragment = -1; continue; if (record.isHasMateDownStream()) { final CramCompressionRecord downMate = records .get(record.index + record.recordsToNextFragment - startCounter);