public static byte[] upperCase(final byte[] bases) { for (int i = 0; i < bases.length; i++) bases[i] = upperCase(bases[i]); return bases; }
public static byte[] upperCase(final byte[] bases) { for (int i = 0; i < bases.length; i++) bases[i] = upperCase(bases[i]); return bases; }
private static char stratifySequenceBase(final byte input, final Boolean getComplement) { return (char) SequenceUtil.upperCase(getComplement ? SequenceUtil.complement(input) : input); }
private static char stratifySequenceBase(final byte input, final Boolean getComplement) { return (char) SequenceUtil.upperCase(getComplement ? SequenceUtil.complement(input) : input); }
/** * Converts all non-reference bases to Ns * so we don't crash the JVM if an unexpected character is encountered * @param seq sequence * @return equivalent sequence containing only ACGTN */ private static byte[] clean(final byte[] seq) { byte[] s = htsjdk.samtools.util.SequenceUtil.upperCase(Arrays.copyOf(seq, seq.length)); for (int i = 0; i < seq.length; i++) { if (!htsjdk.samtools.util.SequenceUtil.isValidBase(s[i])) { s[i] = 'N'; } } return s; } }
@Test public void testUpperCase() { Assert.assertEquals(SequenceUtil.upperCase(StringUtil.stringToBytes("ABCDEFGHIJKLMNOPQRSTUVWXYZ")), StringUtil.stringToBytes("ABCDEFGHIJKLMNOPQRSTUVWXYZ")); Assert.assertEquals(SequenceUtil.upperCase(StringUtil.stringToBytes("abcdefghijklmnopqrstuvwxyz")), StringUtil.stringToBytes("ABCDEFGHIJKLMNOPQRSTUVWXYZ")); Assert.assertEquals(SequenceUtil.upperCase(StringUtil.stringToBytes("1234567890!@#$%^&*()")), StringUtil.stringToBytes("1234567890!@#$%^&*()")); }
@Test public void testReferenceSourceUpperCasesBases() { final String sequenceName = "1"; final String nonIupacCharacters = "1=eE"; final byte[] originalRefBases = (nonIupacCharacters + SequenceUtil.getIUPACCodesString()).getBytes(); SAMSequenceRecord sequenceRecord = new SAMSequenceRecord(sequenceName, originalRefBases.length); InMemoryReferenceSequenceFile memoryReferenceSequenceFile = new InMemoryReferenceSequenceFile(); memoryReferenceSequenceFile.add(sequenceName, Arrays.copyOf(originalRefBases, originalRefBases.length)); Assert.assertEquals(memoryReferenceSequenceFile.getSequence(sequenceName).getBases(), originalRefBases); ReferenceSource referenceSource = new ReferenceSource(memoryReferenceSequenceFile); byte[] refBasesFromSource = referenceSource.getReferenceBases(sequenceRecord, false); Assert.assertNotEquals(refBasesFromSource, originalRefBases); Assert.assertEquals(refBasesFromSource, SequenceUtil.upperCase(originalRefBases)); } }
private CramTestCase() { referenceBases = SequenceUtil.getIUPACCodesString().getBytes(); referenceBasesUpperCased = SequenceUtil.upperCase(Arrays.copyOf(referenceBases, referenceBases.length));