return DETECTOR.detect(tikaIS, metadata).toString(); } finally { if (tikaIS != null) {
@Override public void process(final InputStream stream) throws IOException { try (final InputStream in = new BufferedInputStream(stream)) { TikaInputStream tikaStream = TikaInputStream.get(in); Metadata metadata = new Metadata(); if (filename != null && context.getProperty(USE_FILENAME_IN_DETECTION).asBoolean()) { metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename); } // Get mime type MediaType mediatype = detector.detect(tikaStream, metadata); mimeTypeRef.set(mediatype.toString()); } } });
@Override public void process( InputStream stream, OutputStream output, Metadata metadata) throws Exception { PrintWriter writer = new PrintWriter(getOutputWriter(output, encoding)); writer.println(detector.detect(stream, metadata).toString()); writer.flush(); } };
try (InputStream is = theInputStream; BufferedInputStream bis = new BufferedInputStream(is);) { AutoDetectParser parser = new AutoDetectParser(); Detector detector = parser.getDetector(); Metadata md = new Metadata(); md.add(Metadata.RESOURCE_NAME_KEY, theFileName); MediaType mediaType = detector.detect(bis, md); return mediaType.toString(); }
public boolean isSupported(TikaInputStream input) throws IOException { MediaType type = detector.detect(input, new Metadata()); return parser.getSupportedTypes(new ParseContext()).contains(type); }
TikaConfig config = TikaConfig.getDefaultConfig(); Detector detector = config.getDetector(); TikaInputStream stream = TikaInputStream.get(fileOrStream); Metadata metadata = new Metadata(); metadata.add(Metadata.RESOURCE_NAME_KEY, filenameWithExtension); MediaType mediaType = detector.detect(stream, metadata);
private static ConcurrentHashMap<Path, MediaType> getBaselineDetection(Detector detector, Path[] files) { ConcurrentHashMap<Path, MediaType> baseline = new ConcurrentHashMap<>(); XmlRootExtractor extractor = new XmlRootExtractor(); for (Path f : files) { Metadata metadata = new Metadata(); try (TikaInputStream tis = TikaInputStream.get(f, metadata)) { baseline.put(f, detector.detect(tis, metadata)); baseline.put(f, detector.detect(tis, metadata)); } catch (IOException e) { e.printStackTrace(); } } return baseline; }
private MediaType detect(String testFileName, TikaConfig tikaConfig) throws Exception { try (InputStream is = MimeDetectionTest.class.getResourceAsStream(testFileName)) { return tikaConfig.getDetector().detect(is, new Metadata()); } }
@Override public Integer call() throws Exception { for (int i = 0; i < iterations; i++) { int randIndex = random.nextInt(files.length); Path testFile = files[randIndex]; Metadata metadata = new Metadata(); try (TikaInputStream tis = TikaInputStream.get(testFile, metadata)) { MediaType mediaType = detector.detect(tis, metadata); assertEquals("failed on: " + testFile.getFileName(), truth.get(testFile), mediaType); } } return 1; }
private void detect(byte[] data, MediaType type) { try { InputStream stream = new ByteArrayInputStream(data); assertEquals(type, detector.detect(stream, new Metadata())); } catch (IOException e) { fail("Unexpected exception from ZeroSizeFileDetector"); } }
private void assertNotText(byte[] data) { try { assertEquals( MediaType.OCTET_STREAM, detector.detect( new ByteArrayInputStream(data), new Metadata())); } catch (IOException e) { fail("Unexpected exception from TextDetector"); } }
/** * Test for type detection of empty documents. */ @Test public void testEmptyDocument() throws IOException { assertEquals(MediaType.OCTET_STREAM, detector.detect( new ByteArrayInputStream(new byte[0]), new Metadata())); }
private void assertDetect(Detector detector, MediaType type, byte[] bytes) { try { InputStream stream = new ByteArrayInputStream(bytes); assertEquals(type, detector.detect(stream, new Metadata())); // Test that the stream has been reset for (int i = 0; i < bytes.length; i++) { assertEquals(bytes[i], (byte) stream.read()); } assertEquals(-1, stream.read()); } catch (IOException e) { fail("Unexpected exception from MagicDetector"); } }
@Test public void testDetectNull() throws Exception { assertEquals( MediaType.OCTET_STREAM, detector.detect(null, new Metadata())); }
@Test public void testDetectNull() throws Exception { MediaType html = new MediaType("text", "html"); Detector detector = new MagicDetector(html, "<html".getBytes(US_ASCII)); assertEquals( MediaType.OCTET_STREAM, detector.detect(null, new Metadata())); }
private void assertDetect(MediaType type, String name){ Metadata metadata = new Metadata(); metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name); try { assertEquals(type, detector.detect(null, metadata)); } catch (IOException e) { fail("NameDetector should never throw an IOException"); } } }
private void assertDetect(MediaType type, String name){ Metadata metadata = new Metadata(); metadata.set(Metadata.CONTENT_TYPE, name); try { assertEquals(type, detector.detect(null, metadata)); } catch (IOException e) { fail("TypeDetector should never throw an IOException"); } }
@Test public void testDetectStreamReadProblems() throws Exception { byte[] data = "abcdefghijklmnopqrstuvwxyz0123456789".getBytes(US_ASCII); MediaType testMT = new MediaType("application", "test"); Detector detector = new MagicDetector(testMT, data, null, false, 0, 0); // Deliberately prevent InputStream.read(...) from reading the entire // buffer in one go InputStream stream = new RestrictiveInputStream(data); assertEquals(testMT, detector.detect(stream, new Metadata())); }
@Test public void testDetectApplicationEnviHdr() throws Exception { InputStream iStream = MagicDetectorTest.class.getResourceAsStream( "/test-documents/ang20150420t182050_corr_v1e_img.hdr"); byte[] data = IOUtils.toByteArray(iStream); MediaType testMT = new MediaType("application", "envi.hdr"); Detector detector = new MagicDetector(testMT, data, null, false, 0, 0); // Deliberately prevent InputStream.read(...) from reading the entire // buffer in one go InputStream stream = new RestrictiveInputStream(data); assertEquals(testMT, detector.detect(stream, new Metadata())); }