/** * Create a new comparator. * @param sequence Reference sequence to use as basis for comparison. */ public SuffixArrayComparator( byte[] sequence ) { // Processing the suffix array tends to be easier as a string. this.sequence = StringUtil.bytesToString(sequence); }
/** * Get the DNA sequence. * * @return read sequence as ASCII bytes ACGTN=; {@link SAMRecord#NULL_SEQUENCE} if no bases are present. */ public byte[] getReadBases() { return (readString == null) ? SAMRecord.NULL_SEQUENCE : StringUtil.stringToBytes(readString); }
/** * Convert bases in place into canonical form, upper case, and with no-call represented as N. * @param bases */ static void normalizeBases(final byte[] bases) { for (int i = 0; i < bases.length; ++i) { bases[i] = StringUtil.toUpperCase(bases[i]); if (bases[i] == '.') { bases[i] = 'N'; } } }
/** Converts a pair of bases into a DiploidGenotype regardless of base order or case */ public static DiploidGenotype fromBases(final byte base1, final byte base2) { final byte first = StringUtil.toUpperCase(base1); final byte second = StringUtil.toUpperCase(base2); final DiploidGenotype genotype = genotypes.get(first + second); if (genotype == null) { throw new IllegalArgumentException("Unknown genotype string [" + StringUtil.bytesToString(new byte[] {base1, base2}) + "], any pair of ACTG case insensitive is acceptable"); } return genotype; }
/** * Calculate the reverse complement of the specified sequence * (Stolen from Reseq) * * @param sequenceData * @return reverse complement */ public static String reverseComplement(final String sequenceData) { final byte[] bases = htsjdk.samtools.util.StringUtil.stringToBytes(sequenceData); reverseComplement(bases); return htsjdk.samtools.util.StringUtil.bytesToString(bases); }
public static char complement(char character) { byte base = htsjdk.samtools.util.StringUtil.charToByte(character); base = SequenceUtil.complement(base); return htsjdk.samtools.util.StringUtil.byteToChar(base); }
/** * Reverse-complement all known sequence and base quality attributes of the SAMRecord. */ public static void reverseComplement(final SAMRecord rec) { final byte[] readBases = rec.getReadBases(); SequenceUtil.reverseComplement(readBases); rec.setReadBases(readBases); final byte qualities[] = rec.getBaseQualities(); reverseArray(qualities); rec.setBaseQualities(qualities); final byte[] sqTagValue = (byte[])rec.getAttribute(SAMTagUtil.getSingleton().SQ); if (sqTagValue != null) { SQTagUtil.reverseComplementSqArray(sqTagValue); rec.setAttribute(SAMTagUtil.getSingleton().SQ, sqTagValue); } final String e2TagValue = (String)rec.getAttribute(SAMTagUtil.getSingleton().E2); if (e2TagValue != null) { final byte[] secondaryBases = StringUtil.stringToBytes(e2TagValue); SequenceUtil.reverseComplement(secondaryBases); rec.setAttribute(SAMTagUtil.getSingleton().E2, StringUtil.bytesToString(secondaryBases)); } for (final short stringTag : STRING_TAGS_TO_REVERSE) { final String value = (String)rec.getAttribute(stringTag); if (value != null) { rec.setAttribute(stringTag, StringUtil.reverseString(value)); } } }
/** * Creates a string representation of this chunk list. */ @Override public String toString() { return StringUtil.join(";", chunks); }
ret[outIndex++] = StringUtil.charToByte(mg.charAt(0)); curSeqPos++; } else { final byte[] deletedBases = StringUtil.stringToBytes(mg); System.arraycopy(deletedBases, 1, ret, outIndex, deletedBases.length - 1); outIndex += deletedBases.length - 1; ret[outIndex++] = StringUtil.charToByte(c); curSeqPos++;
private String readLineConditionallySkippingBlanks() throws IOException { String line; do { line = reader.readLine(); if (line == null) return line; } while(skipBlankLines && StringUtil.isBlank(line)); return line; }
public void run() { final int ITERATIONS = 1000000; final String[] fields = new String[10000]; final StopWatch watch = new StopWatch(); watch.start(); for (int i=0; i<ITERATIONS; ++i) { if (StringUtil.split(TEXT, fields, '\t') > 100) { System.out.println("Mama Mia that's a lot of tokens!!"); } } watch.stop(); System.out.println("StringUtil.split() took " + watch.getElapsedTime()); watch.reset(); watch.start(); for (int i=0; i<ITERATIONS; ++i) { if (split(TEXT, fields, "\t") > 100) { System.out.println("Mama Mia that's a lot of tokens!!"); } } watch.stop(); System.out.println("StringTokenizer took " + watch.getElapsedTime()); }
int numFields = StringUtil.split(line, fields, FIELD_SEPARATOR_CHAR); if (numFields == fields.length) { if (StringUtil.splitConcatenateExcessTokens(fields[i], keyAndValue, TAG_KEY_VALUE_SEPARATOR_CHAR) != 2) { reportErrorParsingLine("Problem parsing " + HEADER_LINE_START + mHeaderRecordType + " key:value pair", SAMValidationError.Type.POORLY_FORMATTED_HEADER_TAG, null);
baseQualities = StringUtil.reverseString(baseQualities);
while (sequenceLength > 0 && Character.isWhitespace(StringUtil.byteToChar(bases[sequenceLength - 1]))) { --sequenceLength;
/** * Sadly, this is a duplicate of the method above, except that it takes char[] for referenceBases rather * than byte[]. This is because GATK needs it this way. * * TODO: Remove this method when GATK map method is changed to take refseq as byte[]. */ private static int countMismatches(final SAMRecord read, final char[] referenceBases, final int referenceOffset) { int mismatches = 0; final byte[] readBases = read.getReadBases(); for (final AlignmentBlock block : read.getAlignmentBlocks()) { final int readBlockStart = block.getReadStart() - 1; final int referenceBlockStart = block.getReferenceStart() - 1 - referenceOffset; final int length = block.getLength(); for (int i=0; i<length; ++i) { if (!basesEqual(readBases[readBlockStart+i], StringUtil.charToByte(referenceBases[referenceBlockStart+i]))) { ++mismatches; } } } return mismatches; }
/** * Calculate the reverse complement of the specified sequence * (Stolen from Reseq) * * @param sequenceData * @return reverse complement */ public static String reverseComplement(final String sequenceData) { final byte[] bases = htsjdk.samtools.util.StringUtil.stringToBytes(sequenceData); reverseComplement(bases); return htsjdk.samtools.util.StringUtil.bytesToString(bases); }
/** * Creates a string representation of this chunk list. */ @Override public String toString() { return StringUtil.join(";", chunks); }
/** Converts a pair of bases into a DiploidGenotype regardless of base order or case */ public static DiploidGenotype fromBases(final byte base1, final byte base2) { final byte first = StringUtil.toUpperCase(base1); final byte second = StringUtil.toUpperCase(base2); final DiploidGenotype genotype = genotypes.get(first + second); if (genotype == null) { throw new IllegalArgumentException("Unknown genotype string [" + StringUtil.bytesToString(new byte[] {base1, base2}) + "], any pair of ACTG case insensitive is acceptable"); } return genotype; }
ret[outIndex++] = StringUtil.charToByte(mg.charAt(0)); curSeqPos++; } else { final byte[] deletedBases = StringUtil.stringToBytes(mg); System.arraycopy(deletedBases, 1, ret, outIndex, deletedBases.length - 1); outIndex += deletedBases.length - 1; ret[outIndex++] = StringUtil.charToByte(c); curSeqPos++;
private String readLineConditionallySkippingBlanks() throws IOException { String line; do { line = reader.readLine(); if (line == null) return line; } while(skipBlankLines && StringUtil.isBlank(line)); return line; }