@Test public void test_validateRef() { byte[] ref = "AAAAA".getBytes(); final byte[] md5 = SequenceUtil.calculateMD5(ref, 0, Math.min(5, ref.length)); Slice slice = new Slice(); slice.sequenceId = 0; slice.alignmentSpan = 5; slice.alignmentStart = 1; slice.setRefMD5(ref); Assert.assertEquals(slice.refMD5, md5); Assert.assertTrue(slice.validateRefMD5(ref)); }
public boolean validateRefMD5(final byte[] ref) { if(sequenceId == Slice.MULTI_REFERENCE) throw new SAMException("Cannot verify a slice with multiple references on a single reference."); if (sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) return true; alignmentBordersSanityCheck(ref); if (!validateRefMD5(ref, alignmentStart, alignmentSpan, refMD5)) { final int shoulderLength = 10; final String excerpt = getBrief(alignmentStart, alignmentSpan, ref, shoulderLength); if (validateRefMD5(ref, alignmentStart, alignmentSpan - 1, refMD5)) { log.warn(String.format("Reference MD5 matches partially for slice %d:%d-%d, %s", sequenceId, alignmentStart, alignmentStart + alignmentSpan - 1, excerpt)); return true; } log.error(String.format("Reference MD5 mismatch for slice %d:%d-%d, %s", sequenceId, alignmentStart, alignmentStart + alignmentSpan - 1, excerpt)); return false; } return true; }
Container buildContainer(final List<CramCompressionRecord> records, final SubstitutionMatrix substitutionMatrix) { // sets header APDelta final boolean coordinateSorted = samFileHeader.getSortOrder() == SAMFileHeader.SortOrder.coordinate; final CompressionHeader header = new CompressionHeaderFactory().build(records, substitutionMatrix, coordinateSorted); header.readNamesIncluded = preserveReadNames; final List<Slice> slices = new ArrayList<>(); final Container container = new Container(); container.header = header; container.nofRecords = records.size(); container.globalRecordCounter = globalRecordCounter; container.bases = 0; container.blockCount = 0; long lastGlobalRecordCounter = container.globalRecordCounter; for (int i = 0; i < records.size(); i += recordsPerSlice) { final List<CramCompressionRecord> sliceRecords = records.subList(i, Math.min(records.size(), i + recordsPerSlice)); final Slice slice = Slice.buildSlice(sliceRecords, header); slice.globalRecordCounter = lastGlobalRecordCounter; lastGlobalRecordCounter += slice.nofRecords; container.bases += slice.bases; slices.add(slice); } container.finalizeContainerState(slices.toArray(new Slice[0])); globalRecordCounter += records.size(); return container; }
/** * Initialize a Cram Record Reader from a Slice * * @param header the associated Cram Compression Header * @param validationStringency how strict to be when reading this CRAM record */ public CramRecordReader createCramRecordReader(final CompressionHeader header, final ValidationStringency validationStringency) { return new CramRecordReader(getCoreBlockInputStream(), getExternalBlockInputMap(), header, sequenceId, validationStringency); }
@Test public void testUnmappedValidateRef() { Slice slice = new Slice(); slice.alignmentStart = SAMRecord.NO_ALIGNMENT_START; slice.sequenceId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX; Assert.assertTrue(slice.validateRefMD5(null)); Assert.assertTrue(slice.validateRefMD5(new byte[0])); Assert.assertTrue(slice.validateRefMD5(new byte[1024])); }
@Test(expectedExceptions = CRAMException.class) public static void singleAndUnmappedStateTest() { final Slice single = new Slice(); single.sequenceId = 5; single.alignmentStart = 10; single.alignmentSpan = 15; final Slice unmapped = new Slice(); unmapped.sequenceId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX; unmapped.alignmentStart = Slice.NO_ALIGNMENT_START; unmapped.alignmentSpan = Slice.NO_ALIGNMENT_SPAN; final Container container = new Container(); container.finalizeContainerState(single, unmapped); }
slice.containerOffset = container.offset; slice.index = sliceIndex++; if (slice.isMultiref()) { final ContainerParser parser = new ContainerParser(indexBuilder.bamHeader); final Map<Integer, AlignmentSpan> refSet = parser.getReferences(container, validationStringency); final Slice fakeSlice = new Slice(); slice.containerOffset = container.offset; slice.index = sliceIndex++;
if (!s.validateRefMD5(ref)) { log.error(String .format("Reference sequence MD5 mismatch for slice: seq id %d, start %d, span %d, expected MD5 %s",
final Slice slice = new Slice(); slice.nofRecords = records.size(); if (slice.isUnmapped() && record.isPlaced()) { slice.sequenceId = MULTI_REFERENCE; else if (slice.isMappedSingleRef()) { if (slice.isMappedSingleRef()) { slice.alignmentStart = minAlStart; slice.alignmentSpan = maxAlEnd - minAlStart + 1;
public void setRefMD5(final byte[] ref) { alignmentBordersSanityCheck(ref); if (sequenceId < 0 && alignmentStart < 1) { refMD5 = new byte[16]; Arrays.fill(refMD5, (byte) 0); log.debug("Empty slice ref md5 is set."); } else { final int span = Math.min(alignmentSpan, ref.length - alignmentStart + 1); if (alignmentStart + span > ref.length + 1) throw new RuntimeException("Invalid alignment boundaries."); refMD5 = SequenceUtil.calculateMD5(ref, alignmentStart - 1, span); if (log.isEnabled(Log.LogLevel.DEBUG)) { final StringBuilder sb = new StringBuilder(); final int shoulder = 10; if (ref.length <= shoulder * 2) sb.append(new String(ref)); else { sb.append(getBrief(alignmentStart, alignmentSpan, ref, shoulder)); } log.debug(String.format("Slice md5: %s for %d:%d-%d, %s", String.format("%032x", new BigInteger(1, refMD5)), sequenceId, alignmentStart, alignmentStart + span - 1, sb.toString())); } } }
/** * Create index entries for a single container. * @param container the container to index */ public void processContainer(final Container container) { // TODO: this should be refactored and delegate to container/slice if (!container.isEOF()) { for (final Slice s: container.slices) { if (s.sequenceId == Slice.MULTI_REFERENCE) { final Map<Integer, AlignmentSpan> spans = s.getMultiRefAlignmentSpans(container.header, ValidationStringency.DEFAULT_STRINGENCY); this.entries.addAll(spans.entrySet().stream() .map(e -> new CRAIEntry(e.getKey(), e.getValue().getStart(), e.getValue().getSpan(), container.offset, container.landmarks[s.index], s.size)) .collect(Collectors.toList())); } else { entries.add(s.getCRAIEntry(container.offset)); } } } }
Map<Integer, AlignmentSpan> getReferences(final Slice slice, final CompressionHeader header, final ValidationStringency validationStringency) throws IOException { final Map<Integer, AlignmentSpan> spanMap = new HashMap<>(); switch (slice.sequenceId) { case SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX: spanMap.put(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX, AlignmentSpan.UNMAPPED_SPAN); break; case Slice.MULTI_REFERENCE: final Map<Integer, AlignmentSpan> spans = slice.getMultiRefAlignmentSpans(header, validationStringency); addAllSpans(spanMap, spans); break; default: addSpan(slice.sequenceId, slice.alignmentStart, slice.alignmentSpan, slice.nofRecords, spanMap); break; } return spanMap; }
final CramRecordReader reader = slice.createCramRecordReader(header, validationStringency);
@Test(expectedExceptions = CRAMException.class) public static void differentReferencesStateTest() { final Slice one = new Slice(); one.sequenceId = 5; one.alignmentStart = 10; one.alignmentSpan = 15; final Slice another = new Slice(); another.sequenceId = 2; another.alignmentStart = 1; another.alignmentSpan = 10; final Container container = new Container(); container.finalizeContainerState(one, another); }
/** * Initialize a Cram Record Reader from a Slice * * @param header the associated Cram Compression Header * @param validationStringency how strict to be when reading this CRAM record */ public CramRecordReader createCramRecordReader(final CompressionHeader header, final ValidationStringency validationStringency) { return new CramRecordReader(getCoreBlockInputStream(), getExternalBlockInputMap(), header, sequenceId, validationStringency); }
slice.containerOffset = container.offset; slice.index = sliceIndex++; if (slice.isMultiref()) { final ContainerParser parser = new ContainerParser(indexBuilder.bamHeader); final Map<Integer, AlignmentSpan> refSet = parser.getReferences(container, validationStringency); final Slice fakeSlice = new Slice(); slice.containerOffset = container.offset; slice.index = sliceIndex++;
continue; if (!slice.validateRefMD5(refs)) { final String msg = String.format( "Reference sequence MD5 mismatch for slice: sequence id %d, start %d, span %d, expected MD5 %s",
slice.setRefMD5(refs);
public void setRefMD5(final byte[] ref) { alignmentBordersSanityCheck(ref); if (sequenceId < 0 && alignmentStart < 1) { refMD5 = new byte[16]; Arrays.fill(refMD5, (byte) 0); log.debug("Empty slice ref md5 is set."); } else { final int span = Math.min(alignmentSpan, ref.length - alignmentStart + 1); if (alignmentStart + span > ref.length + 1) throw new RuntimeException("Invalid alignment boundaries."); refMD5 = SequenceUtil.calculateMD5(ref, alignmentStart - 1, span); if (log.isEnabled(Log.LogLevel.DEBUG)) { final StringBuilder sb = new StringBuilder(); final int shoulder = 10; if (ref.length <= shoulder * 2) sb.append(new String(ref)); else { sb.append(getBrief(alignmentStart, alignmentSpan, ref, shoulder)); } log.debug(String.format("Slice md5: %s for %d:%d-%d, %s", String.format("%032x", new BigInteger(1, refMD5)), sequenceId, alignmentStart, alignmentStart + span - 1, sb.toString())); } } }