/** * Resets the given {@link TikaInputStream} (checked by * {@link #ensureStreamReReadable(InputStream, TemporaryResources)}) * so that it can be re-read again. */ public static InputStream streamResetForReRead(InputStream stream, TemporaryResources tmp) throws IOException { // If re-readable, rewind to start if (stream instanceof RereadableInputStream) { ((RereadableInputStream)stream).rewind(); return stream; } // File or Factory based? TikaInputStream tstream = (TikaInputStream)stream; if (tstream.getInputStreamFactory() != null) { // Just get a fresh one each time from the factory return TikaInputStream.get(tstream.getInputStreamFactory(), tmp); } // File based, reset stream to beginning of File tstream.reset(); tstream.mark(-1); return tstream; } }
/** * Ensures that the Stream will be able to be re-read, by buffering to * a temporary file if required. * Streams that are automatically OK include {@link TikaInputStream}s * created from Files or InputStreamFactories, and {@link RereadableInputStream}. */ public static InputStream ensureStreamReReadable(InputStream stream, TemporaryResources tmp) throws IOException { // If it's re-readable, we're done if (stream instanceof RereadableInputStream) return stream; // Make sure it's a TikaInputStream TikaInputStream tstream = TikaInputStream.cast(stream); if (tstream == null) { tstream = TikaInputStream.get(stream, tmp); } // If it's factory based, it's ok if (tstream.getInputStreamFactory() != null) return tstream; // Ensure it's file based tstream.getFile(); // Prepare for future re-reads tstream.mark(-1); return tstream; } /**
@Test public void testStreamBased() throws IOException { InputStream input = IOUtils.toInputStream("Hello, World!", UTF_8.name()); TikaInputStream stream = TikaInputStream.get(input); assertFalse(stream.hasFile()); assertNull(stream.getOpenContainer()); assertNull(stream.getInputStreamFactory()); Path file = TikaInputStream.get(stream).getPath(); assertTrue(file != null && Files.isRegularFile(file)); assertTrue(stream.hasFile()); assertNull(stream.getOpenContainer()); assertNull(stream.getInputStreamFactory()); assertEquals( "The contents of the file returned by the getFile method" + " should equal the contents of the TikaInputStream", "Hello, World!", readFile(file)); assertEquals( "The contents of the TikaInputStream should not get modified" + " by reading the file first", "Hello, World!", readStream(stream)); stream.close(); assertFalse( "The close() method must remove the temporary file created" + " by a TikaInputStream", Files.exists(file)); }
@Test public void testFileBased() throws IOException { Path path = createTempFile("Hello, World!"); TikaInputStream stream = TikaInputStream.get(path); assertTrue(stream.hasFile()); assertNull(stream.getOpenContainer()); assertNull(stream.getInputStreamFactory()); assertEquals( "The file returned by the getFile() method should" + " be the file used to instantiate a TikaInputStream", path, TikaInputStream.get(stream).getPath()); assertEquals( "The contents of the TikaInputStream should equal the" + " contents of the underlying file", "Hello, World!", readStream(stream)); stream.close(); assertTrue( "The close() method must not remove the file used to" + " instantiate a TikaInputStream", Files.exists(path)); Files.delete(path); }
@Test public void testInputStreamFactoryBased() throws IOException { TikaInputStream stream = TikaInputStream.get(new InputStreamFactory() { @Override public InputStream getInputStream() throws IOException { return IOUtils.toInputStream("Hello, World!", UTF_8.name()); } }); assertFalse(stream.hasFile()); assertNull(stream.getOpenContainer()); assertNotNull(stream.getInputStreamFactory()); assertEquals( "The contents of the TikaInputStream should not get modified" + " by reading the file first", "Hello, World!", readStream(stream)); stream.close(); }