private void verifyReadContents( byte[] expected, File inputFile, @Nullable DecompressingChannelFactory decompressionFactory) throws IOException { CompressedSource<Byte> source = CompressedSource.from(new ByteSource(inputFile.toPath().toString(), 1)); if (decompressionFactory != null) { source = source.withDecompression(decompressionFactory); } List<KV<Long, Byte>> actualOutput = Lists.newArrayList(); try (BoundedReader<Byte> reader = source.createReader(PipelineOptionsFactory.create())) { for (boolean more = reader.start(); more; more = reader.advance()) { actualOutput.add(KV.of(reader.getCurrentTimestamp().getMillis(), reader.getCurrent())); } } List<KV<Long, Byte>> expectedOutput = Lists.newArrayList(); for (int i = 0; i < expected.length; i++) { expectedOutput.add(KV.of((long) i, expected[i])); } assertEquals(expectedOutput, actualOutput); }
@Test public void testUnsplittable() throws IOException { String baseName = "test-input"; File compressedFile = tmpFolder.newFile(baseName + ".gz"); byte[] input = generateInput(10000); writeFile(compressedFile, input, CompressionMode.GZIP); CompressedSource<Byte> source = CompressedSource.from(new ByteSource(compressedFile.getPath(), 1)); List<Byte> expected = Lists.newArrayList(); for (byte i : input) { expected.add(i); } PipelineOptions options = PipelineOptionsFactory.create(); BoundedReader<Byte> reader = source.createReader(options); List<Byte> actual = Lists.newArrayList(); for (boolean hasNext = reader.start(); hasNext; hasNext = reader.advance()) { actual.add(reader.getCurrent()); // checkpoint every 9 elements if (actual.size() % 9 == 0) { Double fractionConsumed = reader.getFractionConsumed(); assertNotNull(fractionConsumed); assertNull(reader.splitAtFraction(fractionConsumed)); } } assertEquals(expected.size(), actual.size()); assertEquals(Sets.newHashSet(expected), Sets.newHashSet(actual)); }
@Test public void testEmptyGzipProgress() throws IOException { File tmpFile = tmpFolder.newFile("empty.gz"); String filename = tmpFile.toPath().toString(); writeFile(tmpFile, new byte[0], CompressionMode.GZIP); PipelineOptions options = PipelineOptionsFactory.create(); CompressedSource<Byte> source = CompressedSource.from(new ByteSource(filename, 1)); try (BoundedReader<Byte> readerOrig = source.createReader(options)) { assertThat(readerOrig, instanceOf(CompressedReader.class)); CompressedReader<Byte> reader = (CompressedReader<Byte>) readerOrig; // before starting assertEquals(0.0, reader.getFractionConsumed(), 1e-6); assertEquals(0, reader.getSplitPointsConsumed()); assertEquals(1, reader.getSplitPointsRemaining()); // confirm empty assertFalse(reader.start()); // after reading empty source assertEquals(1.0, reader.getFractionConsumed(), 1e-6); assertEquals(0, reader.getSplitPointsConsumed()); assertEquals(0, reader.getSplitPointsRemaining()); } }
try (BoundedReader<Byte> readerOrig = source.createReader(options)) { assertThat(readerOrig, not(instanceOf(CompressedReader.class))); assertThat(readerOrig, instanceOf(FileBasedReader.class));
try (BoundedReader<Byte> readerOrig = source.createReader(options)) { assertThat(readerOrig, instanceOf(CompressedReader.class)); CompressedReader<Byte> reader = (CompressedReader<Byte>) readerOrig;