@Test public void decompression_should_cross_block_boundries() throws Exception { byte[] uncompressed = Files.readAllBytes(BLOCK_UNCOMPRESSED.toPath()); try (BlockCompressedInputStream stream = new BlockCompressedInputStream(new FileInputStream(BLOCK_COMPRESSED))) { byte[] decompressed = new byte[uncompressed.length]; stream.read(decompressed); Assert.assertEquals(decompressed, uncompressed); Assert.assertTrue(stream.endOfBlock()); Assert.assertEquals(stream.read(), -1); } } @Test
@Test public void stream_should_match_uncompressed_stream() throws Exception { byte[] uncompressed = Files.readAllBytes(BLOCK_UNCOMPRESSED.toPath()); try (BlockCompressedInputStream stream = new BlockCompressedInputStream(new FileInputStream(BLOCK_COMPRESSED))) { for (int i = 0; i < uncompressed.length; i++) { Assert.assertEquals(stream.read(), Byte.toUnsignedInt(uncompressed[i])); } Assert.assertTrue(stream.endOfBlock()); } } @Test
/** * Read up to <code>len</code> bytes from the stream, but no further than the end of the * compressed block. If at the end of the block then no bytes will be read and a return * value of -2 will be returned; on the next call to read, bytes from the next block * will be returned. This is the same contract as CBZip2InputStream in Hadoop. * @return int The return value greater than 0 are the bytes read. A value * of -1 means end of stream while -2 represents end of block. */ private int readWithinBlock(byte[] b, int off, int len) throws IOException { if (input.endOfBlock()) { final int available = input.available(); // this will read the next block, if there is one processedPosition = input.getPosition() >> 16; if (available == 0) { // end of stream return -1; } return END_OF_BLOCK; } // return up to end of block (at most) int available = input.available(); return input.read(b, off, Math.min(available, len)); }
@Test public void endOfBlock_should_be_true_only_when_entire_block_is_read() throws Exception { long size = BLOCK_UNCOMPRESSED.length(); // input file contains 5 blocks List<Long> offsets = new ArrayList<>(); for (int i = 0; i < BLOCK_UNCOMPRESSED_END_POSITIONS.length; i++) { offsets.add(BLOCK_UNCOMPRESSED_END_POSITIONS[i]); } List<Long> endOfBlockTrue = new ArrayList<>(); try (BlockCompressedInputStream stream = new BlockCompressedInputStream(new FileInputStream(BLOCK_COMPRESSED))) { for (long i = 0; i < size; i++) { if (stream.endOfBlock()) { endOfBlockTrue.add(i); } stream.read(); } } Assert.assertEquals(endOfBlockTrue, offsets); } @Test
/** * Read up to <code>len</code> bytes from the stream, but no further than the end of the * compressed block. If at the end of the block then no bytes will be read and a return * value of -2 will be returned; on the next call to read, bytes from the next block * will be returned. This is the same contract as CBZip2InputStream in Hadoop. * @return int The return value greater than 0 are the bytes read. A value * of -1 means end of stream while -2 represents end of block. */ private int readWithinBlock(byte[] b, int off, int len) throws IOException { if (input.endOfBlock()) { final int available = input.available(); // this will read the next block, if there is one processedPosition = input.getPosition() >> 16; if (available == 0) { // end of stream return -1; } return END_OF_BLOCK; } // return up to end of block (at most) int available = input.available(); return input.read(b, off, Math.min(available, len)); }
/** * Read up to <code>len</code> bytes from the stream, but no further than the end of the * compressed block. If at the end of the block then no bytes will be read and a return * value of -2 will be returned; on the next call to read, bytes from the next block * will be returned. This is the same contract as CBZip2InputStream in Hadoop. * @return int The return value greater than 0 are the bytes read. A value * of -1 means end of stream while -2 represents end of block. */ private int readWithinBlock(byte[] b, int off, int len) throws IOException { if (input.endOfBlock()) { final int available = input.available(); // this will read the next block, if there is one processedPosition = input.getPosition() >> 16; if (available == 0) { // end of stream return -1; } return END_OF_BLOCK; } // return up to end of block (at most) int available = input.available(); return input.read(b, off, Math.min(available, len)); }
currentOffset++; if (bgzipStream.endOfBlock()) {
currentOffset++; if (bgzipStream.endOfBlock()) {
final BlockCompressedInputStream bcis2 = new BlockCompressedInputStream(f); int available = bcis2.available(); Assert.assertFalse(bcis2.endOfBlock(), "Should not be at end of block"); Assert.assertTrue(available > 0); byte[] buffer = new byte[available]; Assert.assertEquals(bcis2.read(buffer), available, "Should read to end of block"); Assert.assertTrue(bcis2.endOfBlock(), "Should be at end of block"); bcis2.close(); Assert.assertEquals(bcis2.read(buffer), -1, "Should be end of file");