/** * Writes a FastqRecord into the Appendable output. * @throws SAMException if any I/O error occurs. */ public static Appendable write(final Appendable out,final FastqRecord record) { final String readName = record.getReadName(); final String readString = record.getReadString(); final String qualHeader = record.getBaseQualityHeader(); final String qualityString = record.getBaseQualityString(); try { return out.append(FastqConstants.SEQUENCE_HEADER) .append(readName == null ? "" : readName).append('\n') .append(readString == null ? "" : readString).append('\n') .append(FastqConstants.QUALITY_HEADER) .append(qualHeader == null ? "" : qualHeader).append('\n') .append(qualityString == null ? "" : qualityString); } catch (IOException e) { throw new SAMException(e); } }
@Test public void testEmptyRecord() { //Note: this does not blow up now but it will once we enforce non empty fields final FastqRecord record = new FastqRecord(null, (String) null, null, null); // assert how null is handled Assert.assertNull(record.getReadName()); Assert.assertNull(record.getReadString()); Assert.assertNull(record.getBaseQualityString()); Assert.assertEquals(record.getReadBases(), SAMRecord.NULL_SEQUENCE); Assert.assertEquals(record.getBaseQualities(), SAMRecord.NULL_QUALS); // copy the FastqRecord to check that equals and hashCode is working for the null read without blow up final FastqRecord copy = new FastqRecord(record); Assert.assertEquals(record, copy); Assert.assertEquals(record.hashCode(), copy.hashCode()); }
@Override public Read to(FastqRecord obj) { return new Read(obj.getReadHeader(), obj.getReadString(), obj.getBaseQualityString()); }
/** * Encodes a FastqRecord in the String FASTQ format. */ public static String encode(final FastqRecord record) { // reserve some memory based on the read length int capacity = record.getReadLength() * 2 + 5; // reserve some memory based on the read name if (record.getReadName() != null) { capacity += record.getReadName().length(); } return write(new StringBuilder(capacity), record).toString(); }
public void write(final FastqRecord rec) { writer.print(FastqConstants.SEQUENCE_HEADER); writer.println(rec.getReadHeader()); writer.println(rec.getReadString()); writer.print(FastqConstants.QUALITY_HEADER); writer.println(rec.getBaseQualityHeader() == null ? "" : rec.getBaseQualityHeader()); writer.println(rec.getBaseQualityString()); if (writer.checkError()) { throw new SAMException("Error in writing fastq file " + path); } }
@Test public void testBasic() { final String seqHeaderPrefix = "FAKE0003 Original version has Solexa scores from 62 to -5 inclusive (in that order)"; final String seqLine = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT"; final String qualHeaderPrefix = ""; final String qualLine = ";<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"; final FastqRecord fastqRecord = new FastqRecord(seqHeaderPrefix, seqLine, qualHeaderPrefix, qualLine); Assert.assertNull(fastqRecord.getBaseQualityHeader()); Assert.assertEquals(fastqRecord.getReadName(), seqHeaderPrefix); Assert.assertEquals(fastqRecord.getBaseQualityString(), qualLine); Assert.assertEquals(fastqRecord.getReadString(), seqLine); Assert.assertNotNull(fastqRecord.toString());//just check not nullness Assert.assertNotEquals(fastqRecord, null); Assert.assertFalse(fastqRecord.equals(null)); Assert.assertNotEquals(null, fastqRecord); Assert.assertEquals(fastqRecord, fastqRecord); Assert.assertNotEquals(fastqRecord, "fred"); Assert.assertNotEquals("fred", fastqRecord); Assert.assertEquals(fastqRecord.getReadLength(), seqLine.length()); Assert.assertEquals(fastqRecord.getBaseQualityString().length(), fastqRecord.getReadString().length()); Assert.assertEquals(fastqRecord.getReadString().length(), fastqRecord.getReadLength()); }
@Test(expectedExceptions = IllegalArgumentException.class) public void testNullCopy() { new FastqRecord(null); }
@Test public void testNotEqualQuals() { final String seqLine1 = "GATTACA"; final String qualLine1 = "ABCDEFG"; final String seqLine2 = seqLine1; final String qualLine2 = seqLine2.replace('A', 'X'); final FastqRecord fastqRecord1 = new FastqRecord("header", seqLine1, "qualHeaderPrefix", qualLine1); final FastqRecord fastqRecord2 = new FastqRecord("header", seqLine2, "qualHeaderPrefix", qualLine2); Assert.assertNotEquals(fastqRecord1, fastqRecord2); Assert.assertNotEquals(fastqRecord2, fastqRecord1); Assert.assertEquals(fastqRecord1.getReadString(), fastqRecord2.getReadString()); Assert.assertNotEquals(fastqRecord1.getBaseQualityString(), fastqRecord2.getBaseQualityString()); Assert.assertNotEquals(fastqRecord1.hashCode(), fastqRecord2.hashCode()); Assert.assertNotEquals(fastqRecord2.hashCode(), fastqRecord1.hashCode()); }
@Test public void testCopy() { final String seqHeaderPrefix = "FAKE0003 Original version has Solexa scores from 62 to -5 inclusive (in that order)"; final String seqLine = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT"; final String qualHeaderPrefix = ""; final String qualLine = ";<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"; final FastqRecord fastqRecord = new FastqRecord(seqHeaderPrefix, seqLine, qualHeaderPrefix, qualLine); final FastqRecord fastqRecordCopy = new FastqRecord(fastqRecord); Assert.assertEquals(fastqRecord, fastqRecordCopy); Assert.assertNotSame(fastqRecord, fastqRecordCopy); Assert.assertSame(fastqRecord.getReadString(), fastqRecordCopy.getReadString()); Assert.assertSame(fastqRecord.getBaseQualityString(), fastqRecordCopy.getBaseQualityString()); Assert.assertSame(fastqRecord.getBaseQualityHeader(), fastqRecordCopy.getBaseQualityHeader()); }
@Test(dataProvider = "trimmedData") public void testTrimming(final String samFilename, final int read1Trim, final int read1MaxBases, final int expectedRead1Length, final int read2Trim, final int read2MaxBases, final int expectedRead2Length) throws IOException { final File samFile = new File(TEST_DATA_DIR, samFilename); final File pair1File = newTempFastqFile("pair1"); final File pair2File = newTempFastqFile("pair2"); pair1File.deleteOnExit(); pair2File.deleteOnExit(); convertFile(new String[]{ "INPUT=" + samFile.getAbsolutePath(), "FASTQ=" + pair1File.getAbsolutePath(), "SECOND_END_FASTQ=" + pair2File.getAbsolutePath(), "READ1_TRIM=" + read1Trim, "READ1_MAX_BASES_TO_WRITE=" + read1MaxBases, "READ2_TRIM=" + read2Trim, "READ2_MAX_BASES_TO_WRITE=" + read2MaxBases }); for (final FastqRecord first : new FastqReader(pair1File)) { Assert.assertEquals(first.getReadString().length(), expectedRead1Length, "Incorrect read length"); Assert.assertEquals(first.getBaseQualityString().length(), expectedRead1Length, "Incorrect quality string length"); } for (final FastqRecord second : new FastqReader(pair2File)) { Assert.assertEquals(second.getReadString().length(), expectedRead2Length, "Incorrect read length"); Assert.assertEquals(second.getBaseQualityString().length(), expectedRead2Length, "Incorrect quality string length"); } }
@Override public void align(File fastq, File output, File reference, int threads) throws IOException { try (ReferenceSequenceFile ref = new IndexedFastaSequenceFile(reference)) { SAMFileHeader header = new SAMFileHeader(); header.setSequenceDictionary(ref.getSequenceDictionary()); byte[] bases = ref.getSequence(ref.getSequenceDictionary().getSequence(referenceIndex).getSequenceName()).getBases(); try (FastqReader reader = new FastqReader(fastq)) { try (SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, output)) { for (FastqRecord fqr : reader) { Alignment aln = aligner.align_smith_waterman(fqr.getReadString().getBytes(), bases); SAMRecord r = new SAMRecord(header); r.setReadName(fqr.getReadName()); r.setReferenceIndex(referenceIndex); r.setAlignmentStart(aln.getStartPosition() + 1); r.setCigarString(aln.getCigar()); r.setReadBases(fqr.getReadString().getBytes()); r.setBaseQualities(SAMUtils.fastqToPhred(fqr.getBaseQualityString())); writer.addAlignment(r); } } } } } }
@Test public void testBasicEmptyHeaderPrefix() { final String seqHeaderPrefix = ""; final String seqLine = "ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT"; final String qualHeaderPrefix = ""; final String qualLine = ";<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"; final FastqRecord fastqRecord = new FastqRecord(seqHeaderPrefix, seqLine, qualHeaderPrefix, qualLine); Assert.assertNull(fastqRecord.getReadName()); Assert.assertNull(fastqRecord.getBaseQualityHeader()); }
private static double averageBaseQuality(FastqRecord fqr) { long sum = 0; for (byte v : SAMUtils.fastqToPhred(fqr.getBaseQualityString())) { sum += v; } return (double)sum / fqr.getBaseQualityString().length(); } }
private void testRecord(final String expectedReadName, final FastqRecord fastqRecord, final SAMRecord samRecord) { Assert.assertEquals(fastqRecord.getReadName(), expectedReadName); Assert.assertEquals(fastqRecord.getBaseQualities(), samRecord.getBaseQualities()); Assert.assertEquals(fastqRecord.getReadBases(), samRecord.getReadBases()); Assert.assertEquals(fastqRecord.getBaseQualityHeader(), samRecord.getStringAttribute(SAMTag.CO.name())); }
/** * Converts a {@link FastqRecord} into a simple unmapped {@link SAMRecord}. * * <p>This method allows to pass a {@link BiConsumer} to add the information from the record in * a customizable manner. * * @param record object to encode. * @param header header for the returned object. * @param custom function to customize encoding. Note that default information might be overriden. */ public static SAMRecord asSAMRecord(final FastqRecord record, final SAMFileHeader header, final BiConsumer<FastqRecord, SAMRecord> custom) { // construct the SAMRecord and set the unmapped flag final SAMRecord samRecord = new SAMRecord(header); samRecord.setReadUnmappedFlag(true); // get the read name from the FastqRecord correctly formatted final String readName = SequenceUtil.getSamReadNameFromFastqHeader(record.getReadName()); // set the basic information from the FastqRecord samRecord.setReadName(readName); samRecord.setReadBases(record.getReadBases()); samRecord.setBaseQualities(record.getBaseQualities()); custom.accept(record, samRecord); return samRecord; }
@Test public void testAsSAMRecord() throws Exception { // create a random record final SAMRecord samRecord = new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, "10M", null, 2); FastqRecord fastqRecord = new FastqRecord(samRecord.getReadName(), samRecord.getReadBases(), "", samRecord.getBaseQualities()); testConvertedSAMRecord(FastqEncoder.asSAMRecord(fastqRecord, samRecord.getHeader()), samRecord); fastqRecord = new FastqRecord(samRecord.getReadName() + FastqConstants.FIRST_OF_PAIR, samRecord.getReadBases(), "", samRecord.getBaseQualities()); testConvertedSAMRecord(FastqEncoder.asSAMRecord(fastqRecord, samRecord.getHeader()), samRecord); fastqRecord = new FastqRecord(samRecord.getReadName() + FastqConstants.SECOND_OF_PAIR, samRecord.getReadBases(), "", samRecord.getBaseQualities()); testConvertedSAMRecord(FastqEncoder.asSAMRecord(fastqRecord, samRecord.getHeader()), samRecord); fastqRecord = new FastqRecord(samRecord.getReadName() + FastqConstants.SECOND_OF_PAIR, samRecord.getReadBases(), "Quality header comment", samRecord.getBaseQualities()); // default method does not include the comment header testConvertedSAMRecord(FastqEncoder.asSAMRecord(fastqRecord, samRecord.getHeader()), samRecord); // test with qualityHeaderToComment=true populates the CO tag samRecord.setAttribute(SAMTag.CO.name(), fastqRecord.getBaseQualityHeader()); testConvertedSAMRecord(FastqEncoder.asSAMRecord(fastqRecord, samRecord.getHeader(), FastqEncoder.QUALITY_HEADER_TO_COMMENT_TAG), samRecord); }
@Test public void testEqualsWithNullHeader() { final String seqLine = "GATTACA"; final String qualHeaderPrefix = ""; final String qualLine = "ABCDEFG"; final FastqRecord fastqRecord1 = new FastqRecord("", seqLine, qualHeaderPrefix, qualLine); final FastqRecord fastqRecord2 = new FastqRecord("header", seqLine, qualHeaderPrefix, qualLine); Assert.assertNotEquals(fastqRecord1, fastqRecord2); Assert.assertNotEquals(fastqRecord2, fastqRecord1); Assert.assertNotEquals(fastqRecord1.hashCode(), fastqRecord2.hashCode()); Assert.assertNotEquals(fastqRecord2.hashCode(), fastqRecord1.hashCode()); Assert.assertEquals(fastqRecord1.hashCode(), fastqRecord1.hashCode()); Assert.assertEquals(fastqRecord2.hashCode(), fastqRecord2.hashCode()); }
/** Creates a simple SAM file from a single fastq file. */ protected int doUnpaired(final FastqReader freader, final SAMFileWriter writer) { int readCount = 0; final ProgressLogger progress = new ProgressLogger(LOG); for ( ; freader.hasNext() ; readCount++) { final FastqRecord frec = freader.next(); final SAMRecord srec = createSamRecord(writer.getFileHeader(), SequenceUtil.getSamReadNameFromFastqHeader(frec.getReadHeader()) , frec, false) ; srec.setReadPairedFlag(false); writer.addAlignment(srec); progress.record(srec); } return readCount; }
/** * @return the read name * @deprecated since 02/2017. Use {@link #getReadName()} instead */ @Deprecated public String getReadHeader() { return getReadName(); }
/** * shortcut to getReadString().length() * * @deprecated since 02/2017. Use {@link #getReadLength()} instead */ @Deprecated public int length() { return getReadLength(); }