public int execute() throws IOException { final byte[] uncompressedBuffer = new byte[65536]; final BlockCompressedInputStream gunzipper = new BlockCompressedInputStream(blockFile); gunzipper.setCheckCrcs(true); gunzipper.read(uncompressedBuffer); gunzipper.close(); System.out.printf("SUCCESS!%n"); return 0; }
@Override protected int readFromPosition(final ByteBuffer buffer, final long position) throws IOException { // old position to get back final long oldPos = stream.getFilePointer(); try { final long virtualOffset = gzindex.getVirtualOffsetForSeek(position); stream.seek(virtualOffset); final byte[] array = new byte[buffer.remaining()]; final int read = stream.read(array); buffer.put(array); return read; } finally { stream.seek(oldPos); } }
/** * Read up to <code>len</code> bytes from the stream, but no further than the end of the * compressed block. If at the end of the block then no bytes will be read and a return * value of -2 will be returned; on the next call to read, bytes from the next block * will be returned. This is the same contract as CBZip2InputStream in Hadoop. * @return int The return value greater than 0 are the bytes read. A value * of -1 means end of stream while -2 represents end of block. */ private int readWithinBlock(byte[] b, int off, int len) throws IOException { if (input.endOfBlock()) { final int available = input.available(); // this will read the next block, if there is one processedPosition = input.getPosition() >> 16; if (available == 0) { // end of stream return -1; } return END_OF_BLOCK; } // return up to end of block (at most) int available = input.available(); return input.read(b, off, Math.min(available, len)); }
@Override protected boolean advanceToNextRecordStart() throws IOException { // Advance to next file block if necessary while (mCompressedInputStream.getFilePointer() >= mFilePointerLimit) { if (mFilePointers == null || mFilePointerIndex >= mFilePointers.length) { return false; } final long startOffset = mFilePointers[mFilePointerIndex++]; final long endOffset = mFilePointers[mFilePointerIndex++]; mCompressedInputStream.seek(startOffset); mFilePointerLimit = endOffset; } return true; } }
/** * @param stream stream.markSupported() must be true * @return true if this looks like a BAM file. */ private boolean isBAMFile(final InputStream stream) throws IOException { if (!BlockCompressedInputStream.isValidFile(stream)) { return false; } final int buffSize = BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE; stream.mark(buffSize); final byte[] buffer = new byte[buffSize]; readBytes(stream, buffer, 0, buffSize); stream.reset(); final byte[] magicBuf = new byte[4]; final int magicLength = readBytes(new BlockCompressedInputStream(new ByteArrayInputStream(buffer)), magicBuf, 0, 4); return magicLength == BAMFileConstants.BAM_MAGIC.length && Arrays.equals(BAMFileConstants.BAM_MAGIC, magicBuf); }
this.bgzf = new BlockCompressedInputStream(this.in); this.bgzf.setCheckCrcs(true); final long cp0Virt = (long)cp0 << 16; try { bgzf.seek(cp0Virt); bgzf.seek(cp0Virt | up0); boolean decodedAny = false; try { final int cp2 = (int)(bgzf.getFilePointer() >>> 16); if (cp2 != prevCP) {
new BlockCompressedInputStream(this.in); bgzf.setCheckCrcs(true); bgzf.seek((long)pos << 16);
private void canReadFromBlockStart(long blockStart) throws IOException { BlockCompressedInputStream blockCompressedInputStream = new BlockCompressedInputStream(file); blockCompressedInputStream.setCheckCrcs(true); blockCompressedInputStream.seek(blockStart << 16); byte[] b = new byte[100]; blockCompressedInputStream.read(b); } }
@Override public boolean canRead(File file) { final byte[] BAM_MAGIC = "BAM\1".getBytes(); final byte[] buffer = new byte[BAM_MAGIC.length]; try { InputStream fstream = new BufferedInputStream(new FileInputStream(file)); if ( !BlockCompressedInputStream.isValidFile(fstream) ) return false; final BlockCompressedInputStream BCIS = new BlockCompressedInputStream(fstream); BCIS.read(buffer, 0, BAM_MAGIC.length); BCIS.close(); return Arrays.equals(buffer, BAM_MAGIC); } catch ( IOException e ) { return false; } catch ( htsjdk.samtools.FileTruncatedException e ) { return false; } } }
/** * Convenient ctor that opens the file, wraps with with BGZF reader, and closes after reading index. */ public TabixIndex(final File tabixFile) throws IOException { this(new BlockCompressedInputStream(tabixFile), true); }
@Test public void decompression_should_cross_block_boundries() throws Exception { byte[] uncompressed = Files.readAllBytes(BLOCK_UNCOMPRESSED.toPath()); try (BlockCompressedInputStream stream = new BlockCompressedInputStream(new FileInputStream(BLOCK_COMPRESSED))) { byte[] decompressed = new byte[uncompressed.length]; stream.read(decompressed); Assert.assertEquals(decompressed, uncompressed); Assert.assertTrue(stream.endOfBlock()); Assert.assertEquals(stream.read(), -1); } } @Test
@Test public void available_should_return_number_of_bytes_left_in_current_block() throws Exception { try (BlockCompressedInputStream stream = new BlockCompressedInputStream(BLOCK_COMPRESSED)) { for (int i = 0; i < BLOCK_UNCOMPRESSED_END_POSITIONS[0]; i++) { Assert.assertEquals(stream.available(), BLOCK_UNCOMPRESSED_END_POSITIONS[0] - i); stream.read(); } } }
@Test public void testFilePointer() throws Exception { BlockCompressedInputStream sync = new BlockCompressedInputStream(BAM_FILE); Assert.assertEquals(sync.getFilePointer(), 0); sync.close(); } }
private List<SAMRecord> getRecordsAtSplits(File bam, SplittingBAMIndex index) throws IOException { List<SAMRecord> records = new ArrayList<>(); BAMRecordCodec codec = new BAMRecordCodec(samFileHeader); BlockCompressedInputStream bci = new BlockCompressedInputStream(bam); codec.setInputStream(bci); for (Long offset : index.getVirtualOffsets()) { bci.seek(offset); SAMRecord record = codec.decode(); if (record != null) { records.add(record); } } return records; }
public static void main(String[] args) throws IOException { Params params = new Params(); JCommander jc = new JCommander(params); jc.parse(args); for (File file : params.files) { log.info("Indexing file: " + file.getAbsolutePath()); BlockCompressedInputStream bcis = new BlockCompressedInputStream(new SeekableFileStream(file)); bcis.available(); BGZF_FastaIndexer mli = new BGZF_FastaIndexer(bcis); PrintWriter writer = new PrintWriter(file.getAbsolutePath() + ".fai"); FAIDX_FastaIndexEntry e; while (!writer.checkError() && (e = mli.readNext()) != null) writer.println(e); writer.close(); } }
/** * @return a virtual file pointer for the underlying compressed stream. * @see BlockCompressedInputStream#getFilePointer() */ public long getVirtualFilePointer() { return mCompressedInputStream.getFilePointer(); }
@Override public void close() throws IOException { stream.close(); } }
private void cinSeek(long virt) throws IOException { if (bgzf) ((BlockCompressedInputStream)cin).seek(virt); else ((SeekableStream)cin).seek(virt); }
/** * Reads some number of bytes from the input stream and stores them into the buffer array b. The number of bytes * actually read is returned as an integer. This method blocks until input data is available, end of file is detected, * or an exception is thrown. * * read(buf) has the same effect as read(buf, 0, buf.length). * * @param buffer the buffer into which the data is read. * @return the total number of bytes read into the buffer, or -1 is there is no more data because the end of * the stream has been reached. */ @Override public int read(final byte[] buffer) throws IOException { return read(buffer, 0, buffer.length); }
public void fillBuffer(long beg, long end) throws IOException { this.inFile.seek(beg); int totalRead = 0; for (int left = Math.min((int)(end - beg), arr.length); left > 0;) { final int r = inFile.read(arr, totalRead, left); if (r < 0) break; totalRead += r; left -= r; } arr = Arrays.copyOf(arr, totalRead); in = new ByteArraySeekableStream(arr); bgzf = new BlockCompressedInputStream(in); bgzf.setCheckCrcs(true); posGuesser = new BAMPosGuesser(in, bgzf, referenceSequenceCount); }