} finally { if (tikaIS != null) { tikaIS.close();
autoDetectParser.parse(tikaInputStream, new DefaultHandler(), metadata); } finally { tikaInputStream.close();
public void parse(Image image, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); FileOutputStream fos = null; TikaInputStream tis = null; try { int w = image.getWidth(null); int h = image.getHeight(null); BufferedImage bImage = new BufferedImage(w, h, BufferedImage.TYPE_INT_RGB); File file = tmp.createTemporaryFile(); fos = new FileOutputStream(file); ImageIO.write(bImage, "png", fos); tis = TikaInputStream.get(file); parse(tis, handler, metadata, context); } finally { tmp.dispose(); if (tis != null) tis.close(); if (fos != null) fos.close(); } }
@Test public void testGetMetadata() throws Exception { URL url = TikaInputStreamTest.class.getResource("test.txt"); Metadata metadata = new Metadata(); TikaInputStream.get(url, metadata).close(); assertEquals("test.txt", metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY)); assertEquals( Long.toString(Files.size(Paths.get(url.toURI()))), metadata.get(Metadata.CONTENT_LENGTH)); }
private void extractObj(byte[] bytes, ContentHandler handler, Metadata metadata) throws SAXException, IOException, TikaException { if (bytes == null) { return; } metadata.set(Metadata.CONTENT_LENGTH, Integer.toString(bytes.length)); if (embeddedDocumentUtil.shouldParseEmbedded(metadata)) { TikaInputStream stream = TikaInputStream.get(bytes); if (metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY) == null) { String extension = embeddedDocumentUtil.getExtension(stream, metadata); if (inObject && state == EMB_STATE.PICT) { metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "thumbnail_" + thumbCount++ + extension); metadata.set(RTFMetadata.THUMBNAIL, "true"); } else { metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "file_" + unknownFilenameCount.getAndIncrement() + extension); } } try { embeddedDocumentUtil.parseEmbedded( stream, new EmbeddedContentHandler(handler), metadata, false); } catch (IOException e) { EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata); } finally { stream.close(); } } }
} finally { tis.close();
protected void handleEmbeddedResource(TikaInputStream resource, String filename, String relationshipID, ClassID storageClassID, String mediaType, XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, TikaException { try { Metadata metadata = new Metadata(); if (filename != null) { metadata.set(Metadata.TIKA_MIME_FILE, filename); metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, filename); } if (relationshipID != null) { metadata.set(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID, relationshipID); } if (storageClassID != null) { metadata.set(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID, storageClassID.toString()); } if (mediaType != null) { metadata.set(Metadata.CONTENT_TYPE, mediaType); } if (embeddedDocumentUtil.shouldParseEmbedded(metadata)) { embeddedDocumentUtil.parseEmbedded(resource, xhtml, metadata, outputHtml); } } finally { resource.close(); } }
@Test public void testFileBased() throws IOException { Path path = createTempFile("Hello, World!"); TikaInputStream stream = TikaInputStream.get(path); assertTrue(stream.hasFile()); assertNull(stream.getOpenContainer()); assertNull(stream.getInputStreamFactory()); assertEquals( "The file returned by the getFile() method should" + " be the file used to instantiate a TikaInputStream", path, TikaInputStream.get(stream).getPath()); assertEquals( "The contents of the TikaInputStream should equal the" + " contents of the underlying file", "Hello, World!", readStream(stream)); stream.close(); assertTrue( "The close() method must not remove the file used to" + " instantiate a TikaInputStream", Files.exists(path)); Files.delete(path); }
@Test public void testInputStreamFactoryBased() throws IOException { TikaInputStream stream = TikaInputStream.get(new InputStreamFactory() { @Override public InputStream getInputStream() throws IOException { return IOUtils.toInputStream("Hello, World!", UTF_8.name()); } }); assertFalse(stream.hasFile()); assertNull(stream.getOpenContainer()); assertNotNull(stream.getInputStreamFactory()); assertEquals( "The contents of the TikaInputStream should not get modified" + " by reading the file first", "Hello, World!", readStream(stream)); stream.close(); }
@Test public void testStreamBased() throws IOException { InputStream input = IOUtils.toInputStream("Hello, World!", UTF_8.name()); TikaInputStream stream = TikaInputStream.get(input); assertFalse(stream.hasFile()); assertNull(stream.getOpenContainer()); assertNull(stream.getInputStreamFactory()); Path file = TikaInputStream.get(stream).getPath(); assertTrue(file != null && Files.isRegularFile(file)); assertTrue(stream.hasFile()); assertNull(stream.getOpenContainer()); assertNull(stream.getInputStreamFactory()); assertEquals( "The contents of the file returned by the getFile method" + " should equal the contents of the TikaInputStream", "Hello, World!", readFile(file)); assertEquals( "The contents of the TikaInputStream should not get modified" + " by reading the file first", "Hello, World!", readStream(stream)); stream.close(); assertFalse( "The close() method must remove the temporary file created" + " by a TikaInputStream", Files.exists(file)); }
public void parse(Image image, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); FileOutputStream fos = null; TikaInputStream tis = null; try { int w = image.getWidth(null); int h = image.getHeight(null); BufferedImage bImage = new BufferedImage(w, h, BufferedImage.TYPE_INT_RGB); File file = tmp.createTemporaryFile(); fos = new FileOutputStream(file); ImageIO.write(bImage, "png", fos); tis = TikaInputStream.get(file); parse(tis, handler, metadata, context); } finally { tmp.dispose(); if (tis != null) tis.close(); if (fos != null) fos.close(); } }
public void parse(Image image, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); FileOutputStream fos = null; TikaInputStream tis = null; try { int w = image.getWidth(null); int h = image.getHeight(null); BufferedImage bImage = new BufferedImage(w, h, BufferedImage.TYPE_INT_RGB); File file = tmp.createTemporaryFile(); fos = new FileOutputStream(file); ImageIO.write(bImage, "png", fos); tis = TikaInputStream.get(file); parse(tis, handler, metadata, context); } finally { tmp.dispose(); if (tis != null) tis.close(); if (fos != null) fos.close(); } }
public static String extractMeta(String uri, String contentType) throws Exception { final AutoDetectParser parser = createParser(); final Metadata metadata = new Metadata(); fillMetadata(parser, metadata, contentType, uri); final TikaInputStream inputStream = createInputStream(uri, metadata); parser.parse(inputStream, new DefaultHandler(), metadata); Map meta = new HashMap(); for (String name : metadata.names()) { String[] values = metadata.getValues(name); meta.put(name, values); } inputStream.close(); return new Gson().toJson(meta); }
public static String detectCharset(String uri, String contentType) throws FileNotFoundException, IOException, TikaException { final Metadata metadata = new Metadata(); // Use metadata to provide type-hinting to the AutoDetectReader. fillMetadata(metadata, contentType, uri); final TikaInputStream inputStream = createInputStream(uri, metadata); // Detect the character set. final AutoDetectReader reader = new AutoDetectReader(inputStream, metadata); String charset = reader.getCharset().toString(); inputStream.close(); return charset; }
public static String detectContentType(String uri) throws FileNotFoundException, IOException, TikaException { final Detector detector = config.getDetector(); final TikaInputStream inputStream = createInputStream(uri); final Metadata metadata = new Metadata(); // Set the file name. This provides some level of type-hinting. metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName()); // Detect the content type. String contentType = detector.detect(inputStream, metadata).toString(); inputStream.close(); // Return the default content-type if undetermined. if (contentType == null || contentType.isEmpty()) { return MediaType.OCTET_STREAM.toString(); } return contentType; }
stream.close();
/** * This method returns the mime type for the supplied file. We do not look at any part of the file * name, just at the innards of the file. * @param file the file whose mime type we are seeking * @return the mime type for the file * @throws IOException */ public static String detectMimeType(final File file) throws IOException { TikaInputStream tikaInputStream = null; try { tikaInputStream = TikaInputStream.get(file); final Metadata metadata = new Metadata(); // we don't want to include a check of mime type based on the // filename, but if we did we would uncomment the next line // metadata.set(Metadata.RESOURCE_NAME_KEY, file.getName()); return DETECTOR.detect(tikaInputStream, metadata).toString(); } finally { if (tikaInputStream != null) { tikaInputStream.close(); } } }
/** * This method returns the mime type for the supplied file. We do not look at any part of the file * name, just at the innards of the file. * @param file the file whose mime type we are seeking * @return the mime type for the file * @throws IOException */ public static String detectMimeType(final File file) throws IOException { TikaInputStream tikaInputStream = null; try { tikaInputStream = TikaInputStream.get(file); final Metadata metadata = new Metadata(); // we don't want to include a check of mime type based on the // filename, but if we did we would uncomment the next line // metadata.set(Metadata.RESOURCE_NAME_KEY, file.getName()); return DETECTOR.detect(tikaInputStream, metadata).toString(); } finally { if (tikaInputStream != null) { tikaInputStream.close(); } } }
} finally { if (embedded != null) { embedded.close();
protected void handleEmbeddedResource(TikaInputStream resource, String filename, String relationshipID, ClassID storageClassID, String mediaType, XHTMLContentHandler xhtml, boolean outputHtml) throws IOException, SAXException, TikaException { try { Metadata metadata = new Metadata(); if (filename != null) { metadata.set(Metadata.TIKA_MIME_FILE, filename); metadata.set(Metadata.RESOURCE_NAME_KEY, filename); } if (relationshipID != null) { metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, relationshipID); } if (storageClassID != null) { metadata.set(Metadata.EMBEDDED_STORAGE_CLASS_ID, storageClassID.toString()); } if (mediaType != null) { metadata.set(Metadata.CONTENT_TYPE, mediaType); } if (embeddedDocumentUtil.shouldParseEmbedded(metadata)) { embeddedDocumentUtil.parseEmbedded(resource, xhtml, metadata, outputHtml); } } finally { resource.close(); } }