CramCompressionRecord createRecord() { final SAMFileHeader fileHeader = new SAMFileHeader(); final SAMRecord record = new SAMRecord(fileHeader); final Sam2CramRecordFactory sam2CramRecordFactory = new Sam2CramRecordFactory(null, fileHeader, CramVersions.CRAM_v3); return sam2CramRecordFactory.createCramRecord(record); }
private List<ReadFeature> buildMatchOrMismatchReadFeatures(final String refBases, final String readBases, final String scores) { final SAMFileHeader header = new SAMFileHeader(); final CramCompressionRecord record = new CramCompressionRecord(); record.alignmentStart = 1; final List<ReadFeature> readFeatures = new ArrayList<>(); final int fromPosInRead = 0; final int alignmentStartOffset = 0; final int nofReadBases = 1; final Sam2CramRecordFactory sam2CramRecordFactory = new Sam2CramRecordFactory(refBases.getBytes(), header, CramVersions.CRAM_v3); sam2CramRecordFactory.addMismatchReadFeatures(record.alignmentStart, readFeatures, fromPosInRead, alignmentStartOffset, nofReadBases, readBases.getBytes(), SAMUtils.fastqToPhred(scores)); return readFeatures; } }
refId = samRecord.getReferenceIndex(); ref = getRefBasesOfFail(cramContext, refId); sam2CramRecordFactory.setRefBases(ref); final CramCompressionRecord cramRecord = sam2CramRecordFactory.createCramRecord(samRecord); cramRecord.index = ++index; cramRecord.alignmentStart = samRecord.getAlignmentStart(); if (sam2CramRecordFactory.getBaseCount() < 3 * sam2CramRecordFactory.getFeatureCount()) { log.warn("Abnormally high number of mismatches, possibly wrong reference.");
break; case S: addSoftClip(features, zeroBasedPositionInRead, cigarElementLength, bases); break; case I: addInsertion(features, zeroBasedPositionInRead, cigarElementLength, bases); break; case M: case X: case EQ: addMismatchReadFeatures(cramRecord.alignmentStart, features, zeroBasedPositionInRead, alignmentStartOffset, cigarElementLength, bases, qualityScore); break;
public CramContext(SAMFileHeader samFileHeader, ReferenceSource referenceSource, CramLossyOptions lossyOptions) { this.samFileHeader = samFileHeader; this.referenceSource = referenceSource; this.lossyOptions = lossyOptions; sam2cramFactory = new Sam2CramRecordFactory(null, samFileHeader, CramVersions.CRAM_v3); sam2cramFactory.captureAllTags = lossyOptions.isCaptureAllTags(); sam2cramFactory.captureTags.addAll(lossyOptions.getCaptureTags()); sam2cramFactory.ignoreTags.addAll(lossyOptions.getIgnoreTags()); containerFactory = new ContainerFactory(samFileHeader, RECORDS_PER_SLICE); } }
/** * A wrapper method to provide better diagnostics for ArrayIndexOutOfBoundsException. * * @param cramRecord CRAM record * @param samRecord SAM record * @return a list of read features created for the given {@link htsjdk.samtools.SAMRecord} */ private List<ReadFeature> checkedCreateVariations(final CramCompressionRecord cramRecord, final SAMRecord samRecord) { try { return createVariations(cramRecord, samRecord); } catch (final ArrayIndexOutOfBoundsException e) { log.error("Reference bases array length=" + refBases.length); log.error("Offensive CRAM record: " + cramRecord.toString()); log.error("Offensive SAM record: " + samRecord.getSAMString()); throw e; } }
cramRecord.readFeatures = checkedCreateVariations(cramRecord, record); } else cramRecord.readFeatures = Collections.emptyList();
break; case S: addSoftClip(features, zeroBasedPositionInRead, cigarElementLength, bases); break; case I: addInsertion(features, zeroBasedPositionInRead, cigarElementLength, bases); break; case M: case X: case EQ: addMismatchReadFeatures(cramRecord.alignmentStart, features, zeroBasedPositionInRead, alignmentStartOffset, cigarElementLength, bases, qualityScore); break;
/** * A wrapper method to provide better diagnostics for ArrayIndexOutOfBoundsException. * * @param cramRecord CRAM record * @param samRecord SAM record * @return a list of read features created for the given {@link htsjdk.samtools.SAMRecord} */ private List<ReadFeature> checkedCreateVariations(final CramCompressionRecord cramRecord, final SAMRecord samRecord) { try { return createVariations(cramRecord, samRecord); } catch (final ArrayIndexOutOfBoundsException e) { log.error("Reference bases array length=" + refBases.length); log.error("Offensive CRAM record: " + cramRecord.toString()); log.error("Offensive SAM record: " + samRecord.getSAMString()); throw e; } }
cramRecord.readFeatures = checkedCreateVariations(cramRecord, record); } else cramRecord.readFeatures = Collections.emptyList();
final Sam2CramRecordFactory sam2CramRecordFactory = new Sam2CramRecordFactory(refs, samFileHeader, cramVersion); sam2CramRecordFactory.preserveReadNames = preserveReadNames; sam2CramRecordFactory.captureAllTags = captureAllTags; if (samRecord.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && refSeqIndex != samRecord.getReferenceIndex()) { sam2CramRecordFactory.setRefBases(source.getReferenceBases(samFileHeader.getSequence(samRecord.getReferenceIndex()), true)); final CramCompressionRecord cramRecord = sam2CramRecordFactory.createCramRecord(samRecord); cramRecord.index = ++index; cramRecord.alignmentDelta = samRecord.getAlignmentStart() - prevAlStart; if (sam2CramRecordFactory.getBaseCount() < 3 * sam2CramRecordFactory.getFeatureCount()) log.warn("Abnormally high number of mismatches, possibly wrong reference.");
private byte[] compressScores (SAMRecord record, byte[] ref, QualityScorePreservation p) { ReferenceTracks tracks = new ReferenceTracks(0, record.getReferenceName(), ref); Sam2CramRecordFactory f = new Sam2CramRecordFactory(ref, record.getHeader(), CramVersions.CRAM_v3); CramCompressionRecord cramRecord = f.createCramRecord(record); p.addQualityScores(record, cramRecord, tracks); if (!cramRecord.isForcePreserveQualityScores()) { CramNormalizer.restoreQualityScores((byte) 30, Collections.singletonList(cramRecord)); } return cramRecord.qualityScores; } }
final Sam2CramRecordFactory sam2CramRecordFactory = new Sam2CramRecordFactory(refs, samFileHeader, cramVersion); sam2CramRecordFactory.preserveReadNames = preserveReadNames; sam2CramRecordFactory.captureAllTags = captureAllTags; if (samRecord.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && refSeqIndex != samRecord.getReferenceIndex()) { sam2CramRecordFactory.setRefBases(source.getReferenceBases(samFileHeader.getSequence(samRecord.getReferenceIndex()), true)); final CramCompressionRecord cramRecord = sam2CramRecordFactory.createCramRecord(samRecord); cramRecord.index = ++index; cramRecord.alignmentDelta = samRecord.getAlignmentStart() - prevAlStart; if (sam2CramRecordFactory.getBaseCount() < 3 * sam2CramRecordFactory.getFeatureCount()) log.warn("Abnormally high number of mismatches, possibly wrong reference.");
/** * This checks that all read bases returned in the record from {@link Sam2CramRecordFactory#createCramRecord(SAMRecord)} * are from the BAM read base set. */ @Test public void testReadBaseNormalization() { final SAMFileHeader header = new SAMFileHeader(); final SAMRecord record = new SAMRecord(header); record.setReadName("test"); record.setReadUnmappedFlag(true); record.setReadBases(SequenceUtil.getIUPACCodesString().getBytes()); record.setBaseQualities(SAMRecord.NULL_QUALS); final Sam2CramRecordFactory sam2CramRecordFactory = new Sam2CramRecordFactory(null, header, CramVersions.CRAM_v3); final CramCompressionRecord cramRecord = sam2CramRecordFactory.createCramRecord(record); Assert.assertNotEquals(cramRecord.readBases, record.getReadBases()); Assert.assertEquals(cramRecord.readBases, SequenceUtil.toBamReadBasesInPlace(record.getReadBases())); }
@Ignore("Broken test.") @Test public void test3() { String line1 = "98573 1107 20 1 60 100M = 999587 -415 CTGGTCTTAGTTCCGCAAGTGGGTATATATAAAGGCTCAAAATCAATCTTTATATTGACATCTCTCTACTTATTTGTGTTGTCTGATGCTCATATTGTAG ::A<<=D@BBC;C9=7DEEBHDEHHACEEBEEEDEE=EFFHEEFFFEHEF@HFBCEFEHFEHEHFEHDHHHFHHHEHHHHDFHHHHHGHHHHHHHHHHHH"; String line2 = "98738 1187 20 18 29 99M1S = 1000253 432 AGCGGGGATATATAAAGGCTCAAAATTACTTTTTATATGGACAACTCTCTACTGCTTTGAGATGACTGATACTCATATTGATGGAGCTTTATCAAGAAAT !\"#$%&'()*+-./0'''''''''''#'#'#'''''''#''''#'''''''''##''''#'#''#'''''#'''''''''##''''#''##''''''''?"; String seqName = "20"; List<String> lines = Arrays.asList(new String[] { line2, line1 }); byte[] ref = "CTGGTCTTAGTTCCGCAAGTGGGTATATATAAAGGCTCAAAATCAATCTTTATATTGACATCTCTCTACTTATTTGTGTTGTCTGATGCTCATATTGTAGGAGATTCCTCAAGAAAGG" .getBytes(); ReferenceTracks tracks = new ReferenceTracks(0, seqName, ref); QualityScorePreservation p = new QualityScorePreservation("R8-N40-M40-D40"); for (String line : lines) { SAMRecord record = buildSAMRecord(seqName, line); Sam2CramRecordFactory f = new Sam2CramRecordFactory(ref, record.getHeader(), CramVersions.CRAM_v3); CramCompressionRecord cramRecord = f.createCramRecord(record); p.addQualityScores(record, cramRecord, tracks); if (!cramRecord.isForcePreserveQualityScores()) { CramNormalizer.restoreQualityScores((byte) 30, Collections.singletonList(cramRecord)); } StringBuffer sb = new StringBuffer(); sb.append(record.getBaseQualityString()); sb.append("\n"); sb.append(SAMUtils.phredToFastq(cramRecord.qualityScores)); assertArrayEquals(sb.toString(), record.getBaseQualities(), cramRecord.qualityScores); } }
@Ignore("Broken test.") @Test public void test4() { String line2 = "98738 1187 20 18 29 99M1S = 1000253 432 AGCGGGGATATATAAAGGCTCAAAATTACTTTTTATATGGACAACTCTCTACTGCTTTGAGATGACTGATACTCATATTGATGGAGCTTTATCAAGAAAT !\"#$%&'()*+-./0'''''''''''#'#'#'''''''#''''#'''''''''##''''#'#''#'''''#'''''''''##''''#''##''''''''?"; String seqName = "20"; List<String> lines = Arrays.asList(new String[] { line2 }); byte[] ref = "CTGGTCTTAGTTCCGCAAGTGGGTATATATAAAGGCTCAAAATCAATCTTTATATTGACATCTCTCTACTTATTTGTGTTGTCTGATGCTCATATTGTAGGAGATTCCTCAAGAAAGG" .getBytes(); ReferenceTracks tracks = new ReferenceTracks(0, seqName, ref); QualityScorePreservation p = new QualityScorePreservation("R40X10-N40-U40"); for (int i = 0; i < ref.length; i++) tracks.addCoverage(i + 1, 66); for (String line : lines) { SAMRecord record = buildSAMRecord(seqName, line); Sam2CramRecordFactory f = new Sam2CramRecordFactory(ref, record.getHeader(), CramVersions.CRAM_v3); CramCompressionRecord cramRecord = f.createCramRecord(record); p.addQualityScores(record, cramRecord, tracks); if (!cramRecord.isForcePreserveQualityScores()) { CramNormalizer.restoreQualityScores((byte) 30, Collections.singletonList(cramRecord)); } StringBuffer sb = new StringBuffer(); sb.append(record.getBaseQualityString()); sb.append("\n"); sb.append(SAMUtils.phredToFastq(cramRecord.qualityScores)); assertArrayEquals(sb.toString(), record.getBaseQualities(), cramRecord.qualityScores); } }