public static String probeContentType(final InputStream is, final String name) { try (InputStream stream = new BufferedInputStream(is)) { final Metadata metadata = new Metadata(); metadata.set(Metadata.RESOURCE_NAME_KEY, name); return getDefaultMimeTypes().detect(stream, metadata).toString(); } catch (IOException e) { LOGGER.warn("Couldn't detect the media type of attachment {} {}", name, e); return WILDCARD; } }
@Override public void process(final InputStream stream) throws IOException { try (final InputStream in = new BufferedInputStream(stream)) { TikaInputStream tikaStream = TikaInputStream.get(in); Metadata metadata = new Metadata(); if (filename != null && context.getProperty(USE_FILENAME_IN_DETECTION).asBoolean()) { metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename); } // Get mime type MediaType mediatype = detector.detect(tikaStream, metadata); mimeTypeRef.set(mediatype.toString()); } } });
/** * Returns the name of this media type. * * @return media type name (lower case) */ public String getName() { return type.toString(); }
/** * Returns the name of this media type. * * @return media type name */ public String toString() { return type.toString(); }
private static String[] copyToStringArray(MediaType[] aliases) { String[] strings = new String[aliases.length]; for (int i = 0; i < aliases.length; i++) { strings[i] = aliases[i].toString(); } return strings; }
public String toString() { return mediaType.toString() + " " + type + " " + offset + " " + value + " " + mask; }
@Override public void process( InputStream stream, OutputStream output, Metadata metadata) throws Exception { PrintWriter writer = new PrintWriter(getOutputWriter(output, encoding)); writer.println(detector.detect(stream, metadata).toString()); writer.flush(); } };
/** * @see ImportContext#getMimeType() */ public String getMimeType() { return IOUtil.getMimeType(type.toString()); }
/** * @see ImportContext#getEncoding() */ public String getEncoding() { return IOUtil.getEncoding(type.toString()); }
@Override public void setContentType(Metadata metadata) { metadata.set(Metadata.CONTENT_TYPE, MEDIA_TYPE.toString()); }
@Override public void setContentType(Metadata metadata) { metadata.set(Metadata.CONTENT_TYPE, MEDIA_TYPE.toString()); }
private void setType(Metadata metadata, MediaType type) { metadata.set(Metadata.CONTENT_TYPE, type.toString()); }
@Test public void testQuote() { Map<String, String> parameters = new HashMap<String, String>(); parameters.put("a", " value with spaces "); parameters.put("b", "text/plain"); parameters.put("c", "()<>@,;:\\\"/[]?="); assertEquals( "text/plain; a=\" value with spaces \"; b=\"text\\/plain\"" + "; c=\"\\(\\)\\<\\>\\@\\,\\;\\:\\\\\\\"\\/\\[\\]\\?\\=\"", new MediaType("text", "plain", parameters).toString()); }
@Test public void testByteOrderMark() throws Exception { assertEquals(MediaType.TEXT_PLAIN.toString(), tika.detect( new ByteArrayInputStream("\ufefftest".getBytes(UTF_16LE)), new Metadata())); assertEquals(MediaType.TEXT_PLAIN.toString(), tika.detect( new ByteArrayInputStream("\ufefftest".getBytes(UTF_16BE)), new Metadata())); assertEquals(MediaType.TEXT_PLAIN.toString(), tika.detect( new ByteArrayInputStream("\ufefftest".getBytes(UTF_8)), new Metadata())); }
/** * Test for things like javascript files whose content is enclosed in XML * comment delimiters, but that aren't actually XML. * * @see <a * href="https://issues.apache.org/jira/browse/TIKA-426">TIKA-426</a> */ @Test public void testNotXML() throws IOException { assertEquals(MediaType.TEXT_PLAIN.toString(), tika.detect( new ByteArrayInputStream("<!-- test -->".getBytes(UTF_8)), new Metadata())); }
private void handleWMF(HemfCommentPublic.WindowsMetafile comment, ContentHandler contentHandler, EmbeddedDocumentExtractor embeddedDocumentExtractor) throws IOException, SAXException, TikaException { Metadata embeddedMetadata = new Metadata(); embeddedMetadata.set(Metadata.CONTENT_TYPE, WMF_MEDIA_TYPE.toString()); if (embeddedDocumentExtractor.shouldParseEmbedded(embeddedMetadata)) { try (InputStream is = TikaInputStream.get(comment.getWmfInputStream())) { embeddedDocumentExtractor.parseEmbedded(is, new EmbeddedContentHandler(contentHandler), embeddedMetadata, false); } } }
private void ensureFileSupport(WPPrefixArea pa, Metadata metadata) throws UnsupportedFormatException, EncryptedDocumentException { if (pa.getMajorVersion() != WPPrefixArea.WP5_MAJOR_VERSION && pa.getMajorVersion() != WPPrefixArea.WP6_MAJOR_VERSION) { metadata.set(Metadata.CONTENT_TYPE, WP_UNK.toString()); throw new UnsupportedFormatException( "Parser doesn't recognize this major version: " + pa.getMajorVersion()); } if (pa.isEncrypted()) { throw new EncryptedDocumentException(); } }
private void testUrlWithoutContent(String expected, String url) throws IOException { Metadata metadata = new Metadata(); metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, url); String mime = this.mimeTypes.detect(null, metadata).toString(); assertEquals(url + " is not properly detected using only resource name", expected, mime); }
/** * TIKA-2460 Test loading of custom-mimetypes.xml from sys prop. */ @Test public void testExternalMimeTypes() throws Exception { System.setProperty(MimeTypesFactory.CUSTOM_MIMES_SYS_PROP, "src/test/resources/org/apache/tika/mime/external-mimetypes.xml"); MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(new CustomClassLoader()); Metadata m = new Metadata(); m.add(TikaCoreProperties.RESOURCE_NAME_KEY, "test.external.mime.type"); assertEquals("external/mime-type", mimeTypes.detect(null, m).toString()); }