/** * Create a new comparator. * @param sequence Reference sequence to use as basis for comparison. */ public SuffixArrayComparator( byte[] sequence ) { // Processing the suffix array tends to be easier as a string. this.sequence = StringUtil.bytesToString(sequence); }
/** * Returns the bases represented by this ReferenceSequence as a String. Since this will copy the bases * and convert them to two-byte characters, this should not be used on very long reference sequences, * but as a convenience when manipulating short sequences returned by * {@link ReferenceSequenceFile#getSubsequenceAt(String, long, long)} * * @return The set of bases represented by this ReferenceSequence, as a String */ public String getBaseString() { return StringUtil.bytesToString(bases); }
@SuppressWarnings("deprecation") public static String bytesToString(final byte[] buffer, final int offset, final int length) { /* The non-deprecated way, that requires allocating char[] final char[] charBuffer = new char[length]; for (int i = 0; i < length; ++i) { charBuffer[i] = (char)buffer[i+offset]; } return new String(charBuffer); */ return new String(buffer, 0, offset, length); }
@SuppressWarnings("deprecation") public static String bytesToString(final byte[] buffer, final int offset, final int length) { /* The non-deprecated way, that requires allocating char[] final char[] charBuffer = new char[length]; for (int i = 0; i < length; ++i) { charBuffer[i] = (char)buffer[i+offset]; } return new String(charBuffer); */ return new String(buffer, 0, offset, length); }
@Override public String toString() { return "Bait{" + "name=" + getName() + ", bases=" + StringUtil.bytesToString(bases) + '}'; }
/** * @return read sequence as a string of ACGTN=. */ public String getReadString() { final byte[] readBases = getReadBases(); if (readBases.length == 0) { return NULL_SEQUENCE_STRING; } return StringUtil.bytesToString(readBases); }
/** * Constructor for byte[] arrays * * @param readName the read name (without {@link FastqConstants#SEQUENCE_HEADER}) * @param readBases the read sequence bases as ASCII bytes ACGTN=. * @param qualityHeader the quality header (without {@link FastqConstants#SEQUENCE_HEADER}) * @param baseQualities the base qualities as binary PHRED scores (not ASCII) */ public FastqRecord(final String readName, final byte[] readBases, final String qualityHeader, final byte[] baseQualities) { this(readName, StringUtil.bytesToString(readBases), qualityHeader, SAMUtils.phredToFastq(baseQualities)); }
/** * Constructor for byte[] arrays * * @param readName the read name (without {@link FastqConstants#SEQUENCE_HEADER}) * @param readBases the read sequence bases as ASCII bytes ACGTN=. * @param qualityHeader the quality header (without {@link FastqConstants#SEQUENCE_HEADER}) * @param baseQualities the base qualities as binary PHRED scores (not ASCII) */ public FastqRecord(final String readName, final byte[] readBases, final String qualityHeader, final byte[] baseQualities) { this(readName, StringUtil.bytesToString(readBases), qualityHeader, SAMUtils.phredToFastq(baseQualities)); }
/** * @return read sequence as a string of ACGTN=. */ public String getReadString() { final byte[] readBases = getReadBases(); if (readBases.length == 0) { return NULL_SEQUENCE_STRING; } return StringUtil.bytesToString(readBases); }
private Set<String> makeContextStrings(final int contextSize) { final Set<String> contexts = new HashSet<>(); for (final byte[] kmer : SequenceUtil.generateAllKmers(2 * contextSize + 1)) { if (kmer[contextSize] == 'C') { contexts.add(StringUtil.bytesToString(kmer)); } } log.info("Generated " + contexts.size() + " context strings."); return contexts; }
private Set<String> makeContextStrings(final int contextSize) { final Set<String> contexts = new HashSet<>(); for (final byte[] kmer : SequenceUtil.generateAllKmers(2 * contextSize + 1)) { if (kmer[contextSize] == 'C') { contexts.add(StringUtil.bytesToString(kmer)); } } log.info("Generated " + contexts.size() + " context strings."); return contexts; }
/** Converts a pair of bases into a DiploidGenotype regardless of base order or case */ public static DiploidGenotype fromBases(final byte base1, final byte base2) { final byte first = StringUtil.toUpperCase(base1); final byte second = StringUtil.toUpperCase(base2); final DiploidGenotype genotype = genotypes.get(first + second); if (genotype == null) { throw new IllegalArgumentException("Unknown genotype string [" + StringUtil.bytesToString(new byte[] {base1, base2}) + "], any pair of ACTG case insensitive is acceptable"); } return genotype; }
/** Converts a pair of bases into a DiploidGenotype regardless of base order or case */ public static DiploidGenotype fromBases(final byte base1, final byte base2) { final byte first = StringUtil.toUpperCase(base1); final byte second = StringUtil.toUpperCase(base2); final DiploidGenotype genotype = genotypes.get(first + second); if (genotype == null) { throw new IllegalArgumentException("Unknown genotype string [" + StringUtil.bytesToString(new byte[] {base1, base2}) + "], any pair of ACTG case insensitive is acceptable"); } return genotype; }
/** Gets the bait sequence, with primers, as a String, RC'd as appropriate. */ private String getBaitSequence(final Bait bait, final boolean rc) { String sequence = (LEFT_PRIMER == null ? "" : LEFT_PRIMER) + StringUtil.bytesToString(bait.getBases()) + (RIGHT_PRIMER == null ? "" : RIGHT_PRIMER); if (rc) sequence = SequenceUtil.reverseComplement(sequence); return sequence; }
/** * Calculate the reverse complement of the specified sequence * (Stolen from Reseq) * * @param sequenceData * @return reverse complement */ public static String reverseComplement(final String sequenceData) { final byte[] bases = htsjdk.samtools.util.StringUtil.stringToBytes(sequenceData); reverseComplement(bases); return htsjdk.samtools.util.StringUtil.bytesToString(bases); }
/** Gets the bait sequence, with primers, as a String, RC'd as appropriate. */ private String getBaitSequence(final Bait bait, final boolean rc) { String sequence = (LEFT_PRIMER == null ? "" : LEFT_PRIMER) + StringUtil.bytesToString(bait.getBases()) + (RIGHT_PRIMER == null ? "" : RIGHT_PRIMER); if (rc) sequence = SequenceUtil.reverseComplement(sequence); return sequence; }
/** * Calculate the reverse complement of the specified sequence * (Stolen from Reseq) * * @param sequenceData * @return reverse complement */ public static String reverseComplement(final String sequenceData) { final byte[] bases = htsjdk.samtools.util.StringUtil.stringToBytes(sequenceData); reverseComplement(bases); return htsjdk.samtools.util.StringUtil.bytesToString(bases); }
/** * Calculate the reverse complement of the specified sequence * (Stolen from Reseq) * * @param sequenceData * @return reverse complement */ public static String reverseComplement(final String sequenceData) { final byte[] bases = htsjdk.samtools.util.StringUtil.stringToBytes(sequenceData); reverseComplement(bases); return htsjdk.samtools.util.StringUtil.bytesToString(bases); }
@Test(dataProvider="homosapiens") public void testFirstSequence(AbstractIndexedFastaSequenceFile sequenceFile) { long startTime = System.currentTimeMillis(); ReferenceSequence sequence = sequenceFile.getSubsequenceAt("chrM",1,firstBasesOfChrM.length()); long endTime = System.currentTimeMillis(); Assert.assertEquals(sequence.getName(),"chrM","Sequence contig is not correct"); Assert.assertEquals(sequence.getContigIndex(),0,"Sequence contig index is not correct"); Assert.assertEquals(StringUtil.bytesToString(sequence.getBases()),firstBasesOfChrM,"First n bases of chrM are incorrect"); CloserUtil.close(sequenceFile); System.err.printf("testFirstSequence runtime: %dms%n", (endTime - startTime)) ; }
@Test(dataProvider = "makeReferenceFromAlignment") public void testMakeReferenceFromAlignment(final String seq, final String cigar, final String md, boolean includeReferenceBasesForDeletions, final String expectedReference) { final SAMRecord rec = new SAMRecord(null); rec.setReadName("test"); rec.setReadString(seq); rec.setCigarString(cigar); rec.setAttribute(SAMTag.MD.name(), md); final byte[] refBases = SequenceUtil.makeReferenceFromAlignment(rec, includeReferenceBasesForDeletions); Assert.assertEquals(StringUtil.bytesToString(refBases), expectedReference); }