@Test public void test_updateSubstitutionCodes() { final CramCompressionRecord record = new CramCompressionRecord(); final Substitution s = new Substitution(); s.setPosition(1); final byte refBase = 'A'; final byte readBase = 'C'; s.setBase(readBase); s.setReferenceBase(refBase); record.readFeatures = new ArrayList<>(); record.readFeatures.add(s); record.readLength = 2; final List<CramCompressionRecord> records = new ArrayList<>(); records.add(record); final long[][] frequencies = new long[256][256]; frequencies[refBase][readBase] = 1; SubstitutionMatrix matrix = new SubstitutionMatrix(frequencies); Assert.assertTrue(s.getCode() == -1); CompressionHeaderFactory.updateSubstitutionCodes(records, matrix); Assert.assertFalse(s.getCode() == -1); Assert.assertEquals(s.getCode(), matrix.code(refBase, readBase)); }
/** * Given the records update the substitution matrix with actual substitution * codes. * * @param records * CRAM records * @param substitutionMatrix * the matrix to be updated */ static void updateSubstitutionCodes(final List<CramCompressionRecord> records, final SubstitutionMatrix substitutionMatrix) { for (final CramCompressionRecord record : records) { if (record.readFeatures != null) { for (final ReadFeature recordFeature : record.readFeatures) { if (recordFeature.getOperator() == Substitution.operator) { final Substitution substitution = ((Substitution) recordFeature); if (substitution.getCode() == Substitution.NO_CODE) { final byte refBase = substitution.getReferenceBase(); final byte base = substitution.getBase(); substitution.setCode(substitutionMatrix.code(refBase, base)); } } } } } }
/** * Build an array of substitution frequencies for the given CRAM records. * * @param records * CRAM records to scan * @return a 2D array of frequencies, see * {@link htsjdk.samtools.cram.structure.SubstitutionMatrix} */ static long[][] buildFrequencies(final List<CramCompressionRecord> records) { final long[][] frequencies = new long[BYTE_SPACE_SIZE][BYTE_SPACE_SIZE]; for (final CramCompressionRecord record : records) { if (record.readFeatures != null) { for (final ReadFeature readFeature : record.readFeatures) { if (readFeature.getOperator() == Substitution.operator) { final Substitution substitution = ((Substitution) readFeature); final byte refBase = substitution.getReferenceBase(); final byte base = substitution.getBase(); frequencies[refBase][base]++; } } } } return frequencies; }
/** * Test the outcome of a ACGTN mismatch. * The result should always be a {@link Substitution} read feature. */ @Test public void testAddMismatchReadFeaturesSingleSubstitution() { final List<ReadFeature> readFeatures = buildMatchOrMismatchReadFeatures("A", "C", "!"); Assert.assertEquals(1, readFeatures.size()); final ReadFeature rf = readFeatures.get(0); Assert.assertTrue(rf instanceof Substitution); final Substitution substitution = (Substitution) rf; Assert.assertEquals(1, substitution.getPosition()); Assert.assertEquals('C', substitution.getBase()); Assert.assertEquals('A', substitution.getReferenceBase()); }
@Test public void test_buildFrequencies() { final CramCompressionRecord record = new CramCompressionRecord(); final Substitution s = new Substitution(); s.setPosition(1); final byte refBase = 'A'; final byte readBase = 'C'; s.setBase(readBase); s.setReferenceBase(refBase); s.setCode((byte) 1); record.readFeatures = new ArrayList<>(); record.readFeatures.add(s); record.readLength = 2; final List<CramCompressionRecord> records = new ArrayList<>(); records.add(record); final long[][] frequencies = CompressionHeaderFactory.buildFrequencies(records); for (int i = 0; i < frequencies.length; i++) { for (int j = 0; j < frequencies[i].length; j++) { if (i != refBase && j != readBase) { Assert.assertEquals(frequencies[i][j], 0); } } } Assert.assertEquals(frequencies[refBase][readBase], 1); }
final byte base = substitutionMatrix.base(refBase, substitution.getCode()); substitution.setBase(base); substitution.setReferenceBase(refBase); bases[posInRead++ - 1] = base; posInSeq++;
case Substitution.operator: final Substitution sv = (Substitution) f; if (sv.getCode() < 0) baseSubstitutionCodeCodec.writeData(substitutionMatrix.code(sv.getReferenceBase(), sv.getBase())); else baseSubstitutionCodeCodec.writeData(sv.getCode());
break; case Substitution.operator: final Substitution substitution = new Substitution(); substitution.setPosition(pos); final byte code = baseSubstitutionCodec.readData(); substitution.setCode(code); readFeatures.add(substitution); break;
final boolean isSubstitution = SequenceUtil.isUpperACGTN(readBase) && SequenceUtil.isUpperACGTN(refBase); if (isSubstitution) { features.add(new Substitution(oneBasedPositionInRead, readBase, refBase)); } else { final byte score = qualityScore[i + fromPosInRead];
final byte base = substitutionMatrix.base(refBase, substitution.getCode()); substitution.setBase(base); substitution.setReferenceBase(refBase); bases[posInRead++ - 1] = base; posInSeq++;
case Substitution.operator: final Substitution sv = (Substitution) f; if (sv.getCode() < 0) baseSubstitutionCodeCodec.writeData(substitutionMatrix.code(sv.getReferenceBase(), sv.getBase())); else baseSubstitutionCodeCodec.writeData(sv.getCode());
break; case Substitution.operator: final Substitution substitution = new Substitution(); substitution.setPosition(pos); final byte code = baseSubstitutionCodec.readData(); substitution.setCode(code); readFeatures.add(substitution); break;
/** * Build an array of substitution frequencies for the given CRAM records. * * @param records * CRAM records to scan * @return a 2D array of frequencies, see * {@link htsjdk.samtools.cram.structure.SubstitutionMatrix} */ static long[][] buildFrequencies(final List<CramCompressionRecord> records) { final long[][] frequencies = new long[BYTE_SPACE_SIZE][BYTE_SPACE_SIZE]; for (final CramCompressionRecord record : records) { if (record.readFeatures != null) { for (final ReadFeature readFeature : record.readFeatures) { if (readFeature.getOperator() == Substitution.operator) { final Substitution substitution = ((Substitution) readFeature); final byte refBase = substitution.getReferenceBase(); final byte base = substitution.getBase(); frequencies[refBase][base]++; } } } } return frequencies; }
final boolean isSubstitution = SequenceUtil.isUpperACGTN(readBase) && SequenceUtil.isUpperACGTN(refBase); if (isSubstitution) { features.add(new Substitution(oneBasedPositionInRead, readBase, refBase)); } else { final byte score = qualityScore[i + fromPosInRead];
final byte base = substitutionMatrix.base(refBase, substitution.getCode()); substitution.setBase(base); substitution.setReferenceBase(refBase); bases[posInRead++ - 1] = base; posInSeq++;
/** * Given the records update the substitution matrix with actual substitution * codes. * * @param records * CRAM records * @param substitutionMatrix * the matrix to be updated */ static void updateSubstitutionCodes(final List<CramCompressionRecord> records, final SubstitutionMatrix substitutionMatrix) { for (final CramCompressionRecord record : records) { if (record.readFeatures != null) { for (final ReadFeature recordFeature : record.readFeatures) { if (recordFeature.getOperator() == Substitution.operator) { final Substitution substitution = ((Substitution) recordFeature); if (substitution.getCode() == Substitution.NO_CODE) { final byte refBase = substitution.getReferenceBase(); final byte base = substitution.getBase(); substitution.setCode(substitutionMatrix.code(refBase, base)); } } } } } }