/** * Reads some number of bytes from the input stream and stores them into the buffer array b. The number of bytes * actually read is returned as an integer. This method blocks until input data is available, end of file is detected, * or an exception is thrown. * * read(buf) has the same effect as read(buf, 0, buf.length). * * @param buffer the buffer into which the data is read. * @return the total number of bytes read into the buffer, or -1 is there is no more data because the end of * the stream has been reached. */ @Override public int read(final byte[] buffer) throws IOException { return read(buffer, 0, buffer.length); }
/** * Reads some number of bytes from the input stream and stores them into the buffer array b. The number of bytes * actually read is returned as an integer. This method blocks until input data is available, end of file is detected, * or an exception is thrown. * * read(buf) has the same effect as read(buf, 0, buf.length). * * @param buffer the buffer into which the data is read. * @return the total number of bytes read into the buffer, or -1 is there is no more data because the end of * the stream has been reached. */ public int read(final byte[] buffer) throws IOException { return read(buffer, 0, buffer.length); }
/** * Reads some number of bytes from the input stream and stores them into the buffer array b. The number of bytes * actually read is returned as an integer. This method blocks until input data is available, end of file is detected, * or an exception is thrown. * * read(buf) has the same effect as read(buf, 0, buf.length). * * @param buffer the buffer into which the data is read. * @return the total number of bytes read into the buffer, or -1 is there is no more data because the end of * the stream has been reached. */ @Override public int read(final byte[] buffer) throws IOException { return read(buffer, 0, buffer.length); }
/** * Reads some number of bytes from the input stream and stores them into the buffer array b. The number of bytes * actually read is returned as an integer. This method blocks until input data is available, end of file is detected, * or an exception is thrown. * * read(buf) has the same effect as read(buf, 0, buf.length). * * @param buffer the buffer into which the data is read. * @return the total number of bytes read into the buffer, or -1 is there is no more data because the end of * the stream has been reached. */ @Override public int read(final byte[] buffer) throws IOException { return read(buffer, 0, buffer.length); }
/** * Read one line from the data file. */ public String readLine() throws IOException { StringBuffer buf = new StringBuffer(); int c; while ((c = fileInputStream.read()) >= 0 && c != '\n') buf.append((char) c); if (c < 0) return null; return buf.toString(); }
private boolean readLine() throws IOException { lineCounter++; int ch = is.read(); if (ch == -1) return false; lineBuf.clear(); lineBuf.put((byte) (0xFF & ch)); while ((ch = is.read()) != -1) { lineBuf.put((byte) (0xFF & ch)); if (!lineBuf.hasRemaining()) reallocate(); if (ch == '\n') break; } if (ch == -1) throw new EOFException(); lineBuf.flip(); return true; }
@Override public boolean canRead(File file) { final byte[] BAM_MAGIC = "BAM\1".getBytes(); final byte[] buffer = new byte[BAM_MAGIC.length]; try { InputStream fstream = new BufferedInputStream(new FileInputStream(file)); if ( !BlockCompressedInputStream.isValidFile(fstream) ) return false; final BlockCompressedInputStream BCIS = new BlockCompressedInputStream(fstream); BCIS.read(buffer, 0, BAM_MAGIC.length); BCIS.close(); return Arrays.equals(buffer, BAM_MAGIC); } catch ( IOException e ) { return false; } catch ( htsjdk.samtools.FileTruncatedException e ) { return false; } } }
public int execute() throws IOException { final byte[] uncompressedBuffer = new byte[65536]; final BlockCompressedInputStream gunzipper = new BlockCompressedInputStream(blockFile); gunzipper.setCheckCrcs(true); gunzipper.read(uncompressedBuffer); gunzipper.close(); System.out.printf("SUCCESS!%n"); return 0; }
@Override protected int readFromPosition(final ByteBuffer buffer, final long position) throws IOException { // old position to get back final long oldPos = stream.getFilePointer(); try { final long virtualOffset = gzindex.getVirtualOffsetForSeek(position); stream.seek(virtualOffset); final byte[] array = new byte[buffer.remaining()]; final int read = stream.read(array); buffer.put(array); return read; } finally { stream.seek(oldPos); } }
@Test public void decompression_should_cross_block_boundries() throws Exception { byte[] uncompressed = Files.readAllBytes(BLOCK_UNCOMPRESSED.toPath()); try (BlockCompressedInputStream stream = new BlockCompressedInputStream(new FileInputStream(BLOCK_COMPRESSED))) { byte[] decompressed = new byte[uncompressed.length]; stream.read(decompressed); Assert.assertEquals(decompressed, uncompressed); Assert.assertTrue(stream.endOfBlock()); Assert.assertEquals(stream.read(), -1); } } @Test
@Override protected int readFromPosition(final ByteBuffer buffer, final long position) throws IOException { // old position to get back final long oldPos = stream.getFilePointer(); try { final long virtualOffset = gzindex.getVirtualOffsetForSeek(position); stream.seek(virtualOffset); final byte[] array = new byte[buffer.remaining()]; final int read = stream.read(array); buffer.put(array); return read; } finally { stream.seek(oldPos); } }
@Test public void stream_should_match_uncompressed_stream() throws Exception { byte[] uncompressed = Files.readAllBytes(BLOCK_UNCOMPRESSED.toPath()); try (BlockCompressedInputStream stream = new BlockCompressedInputStream(new FileInputStream(BLOCK_COMPRESSED))) { for (int i = 0; i < uncompressed.length; i++) { Assert.assertEquals(stream.read(), Byte.toUnsignedInt(uncompressed[i])); } Assert.assertTrue(stream.endOfBlock()); } } @Test
/** * Read up to <code>len</code> bytes from the stream, but no further than the end of the * compressed block. If at the end of the block then no bytes will be read and a return * value of -2 will be returned; on the next call to read, bytes from the next block * will be returned. This is the same contract as CBZip2InputStream in Hadoop. * @return int The return value greater than 0 are the bytes read. A value * of -1 means end of stream while -2 represents end of block. */ private int readWithinBlock(byte[] b, int off, int len) throws IOException { if (input.endOfBlock()) { final int available = input.available(); // this will read the next block, if there is one processedPosition = input.getPosition() >> 16; if (available == 0) { // end of stream return -1; } return END_OF_BLOCK; } // return up to end of block (at most) int available = input.available(); return input.read(b, off, Math.min(available, len)); }
@Test public void available_should_return_number_of_bytes_left_in_current_block() throws Exception { try (BlockCompressedInputStream stream = new BlockCompressedInputStream(BLOCK_COMPRESSED)) { for (int i = 0; i < BLOCK_UNCOMPRESSED_END_POSITIONS[0]; i++) { Assert.assertEquals(stream.available(), BLOCK_UNCOMPRESSED_END_POSITIONS[0] - i); stream.read(); } } }
/** * Read up to <code>len</code> bytes from the stream, but no further than the end of the * compressed block. If at the end of the block then no bytes will be read and a return * value of -2 will be returned; on the next call to read, bytes from the next block * will be returned. This is the same contract as CBZip2InputStream in Hadoop. * @return int The return value greater than 0 are the bytes read. A value * of -1 means end of stream while -2 represents end of block. */ private int readWithinBlock(byte[] b, int off, int len) throws IOException { if (input.endOfBlock()) { final int available = input.available(); // this will read the next block, if there is one processedPosition = input.getPosition() >> 16; if (available == 0) { // end of stream return -1; } return END_OF_BLOCK; } // return up to end of block (at most) int available = input.available(); return input.read(b, off, Math.min(available, len)); }
/** * Read up to <code>len</code> bytes from the stream, but no further than the end of the * compressed block. If at the end of the block then no bytes will be read and a return * value of -2 will be returned; on the next call to read, bytes from the next block * will be returned. This is the same contract as CBZip2InputStream in Hadoop. * @return int The return value greater than 0 are the bytes read. A value * of -1 means end of stream while -2 represents end of block. */ private int readWithinBlock(byte[] b, int off, int len) throws IOException { if (input.endOfBlock()) { final int available = input.available(); // this will read the next block, if there is one processedPosition = input.getPosition() >> 16; if (available == 0) { // end of stream return -1; } return END_OF_BLOCK; } // return up to end of block (at most) int available = input.available(); return input.read(b, off, Math.min(available, len)); }
@Test public void endOfBlock_should_be_true_only_when_entire_block_is_read() throws Exception { long size = BLOCK_UNCOMPRESSED.length(); // input file contains 5 blocks List<Long> offsets = new ArrayList<>(); for (int i = 0; i < BLOCK_UNCOMPRESSED_END_POSITIONS.length; i++) { offsets.add(BLOCK_UNCOMPRESSED_END_POSITIONS[i]); } List<Long> endOfBlockTrue = new ArrayList<>(); try (BlockCompressedInputStream stream = new BlockCompressedInputStream(new FileInputStream(BLOCK_COMPRESSED))) { for (long i = 0; i < size; i++) { if (stream.endOfBlock()) { endOfBlockTrue.add(i); } stream.read(); } } Assert.assertEquals(endOfBlockTrue, offsets); } @Test
@Test public void seek_should_read_block() throws Exception { byte[] uncompressed = Files.readAllBytes(BLOCK_UNCOMPRESSED.toPath()); try (SeekableFileStream sfs = new SeekableFileStream(BLOCK_COMPRESSED)) { try (BlockCompressedInputStream stream = new BlockCompressedInputStream(sfs)) { // seek to the start of the first block for (int i = 0; i < BLOCK_COMPRESSED_OFFSETS.length-1; i++) { stream.seek(BLOCK_COMPRESSED_OFFSETS[i] << 16); Assert.assertEquals(sfs.position(), BLOCK_COMPRESSED_OFFSETS[i + 1]); // check byte[] actual = new byte[uncompressed.length]; int len = stream.read(actual); actual = Arrays.copyOf(actual, len); byte[] expected = Arrays.copyOfRange(uncompressed, uncompressed.length - actual.length, uncompressed.length); Assert.assertEquals(actual, expected); } } } } @Test
@Test public void testOverflow() throws Exception { final File f = File.createTempFile("BCOST.", ".gz"); f.deleteOnExit(); final List<String> linesWritten = new ArrayList<>(); System.out.println("Creating file " + f); final BlockCompressedOutputStream bcos = new BlockCompressedOutputStream(f); Random r = new Random(15555); final int INPUT_SIZE = 64 * 1024; byte[] input = new byte[INPUT_SIZE]; r.nextBytes(input); bcos.write(input); bcos.close(); final BlockCompressedInputStream bcis = new BlockCompressedInputStream(f); byte[] output = new byte[INPUT_SIZE]; int len; int i = 0; while ((len = bcis.read(output, 0, output.length)) != -1) { for (int j = 0; j < len; j++) { Assert.assertEquals(output[j], input[i++]); } } Assert.assertEquals(i, INPUT_SIZE); bcis.close(); }
private void canReadFromBlockStart(long blockStart) throws IOException { BlockCompressedInputStream blockCompressedInputStream = new BlockCompressedInputStream(file); blockCompressedInputStream.setCheckCrcs(true); blockCompressedInputStream.seek(blockStart << 16); byte[] b = new byte[100]; blockCompressedInputStream.read(b); } }