Refine search
@Override public void process(final InputStream stream) throws IOException { try (final InputStream in = new BufferedInputStream(stream)) { TikaInputStream tikaStream = TikaInputStream.get(in); Metadata metadata = new Metadata(); if (filename != null && context.getProperty(USE_FILENAME_IN_DETECTION).asBoolean()) { metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename); } // Get mime type MediaType mediatype = detector.detect(tikaStream, metadata); mimeTypeRef.set(mediatype.toString()); } } });
try (TikaInputStream stream = TikaInputStream.get( new DocumentInputStream((DocumentEntry) ooxml))) { ZipContainerDetector detector = new ZipContainerDetector(); try { handleEmbeddedResource(stream, null, dir.getName(), dir.getStorageClsid(), type.toString(), xhtml, true); return; embedded = TikaInputStream.get(data); } catch (Ole10NativeException ex) { byte[] contents = new byte[contentsEntry.getSize()]; inp.readFully(contents); embedded = TikaInputStream.get(contents); MediaType mediaType = getDetector().detect(embedded, new Metadata()); String extension = type.getExtension(); try { MimeType mimeType = getMimeTypes().forName(mediaType.toString()); extension = mimeType.getExtension(); } catch (MimeTypeException mte) { metadata.set(Metadata.CONTENT_TYPE, mediaType.getType().toString()); metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, rName + extension); } catch (Exception e) {
Metadata metadata = new Metadata(); metadata.add( Metadata.RESOURCE_NAME_KEY, inputFile.toURI().toString()); InputStream stream = TikaInputStream.get(inputFile); MediaType type = parser.getDetector().detect(stream, metadata); System.out.println("Detector found: "+type); metadata.add( Metadata.CONTENT_TYPE, type.toString());
TikaConfig config = TikaConfig.getDefaultConfig(); Detector detector = config.getDetector(); TikaInputStream stream = TikaInputStream.get(fileOrStream); Metadata metadata = new Metadata(); metadata.add(Metadata.RESOURCE_NAME_KEY, filenameWithExtension); MediaType mediaType = detector.detect(stream, metadata);
public String getExtension(TikaInputStream is, Metadata metadata) { String mimeString = metadata.get(Metadata.CONTENT_TYPE); MediaType mediaType = getDetector().detect(is, metadata); mimeType = localMimeTypes.forName(mediaType.toString()); detected = true; is.reset(); } catch (IOException e) { if (detected) { metadata.set(Metadata.CONTENT_TYPE, mimeType.toString());
Metadata metadata = new Metadata(); MediaType mediaType = MediaType.OCTET_STREAM; try { mediaType = detector.detect( bais, metadata ); mediaType = detector.detect( fis, metadata ); return mediaType.toString(); fileMetadata.put( AssetUtils.CONTENT_TYPE, mediaType.toString() ); return mediaType.toString();
TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tis = TikaInputStream.get(stream, tmp); MediaType type = detector.detect(tis, metadata); metadata.set(Metadata.CONTENT_TYPE, type.toString()); if (tis.getOpenContainer() == null) { tis.mark(1); if (tis.read() == -1) { throw new ZeroByteFileException("InputStream must have > 0 bytes");
private static ConcurrentHashMap<Path, MediaType> getBaselineDetection(Detector detector, Path[] files) { ConcurrentHashMap<Path, MediaType> baseline = new ConcurrentHashMap<>(); XmlRootExtractor extractor = new XmlRootExtractor(); for (Path f : files) { Metadata metadata = new Metadata(); try (TikaInputStream tis = TikaInputStream.get(f, metadata)) { baseline.put(f, detector.detect(tis, metadata)); baseline.put(f, detector.detect(tis, metadata)); } catch (IOException e) { e.printStackTrace(); } } return baseline; }
public void parse( InputStream stream, ContentHandler ignored, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { TemporaryResources tmp = new TemporaryResources(); try { TikaInputStream tis = TikaInputStream.get(stream, tmp); // Figure out what we have to process String filename = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY); MediaType type = detector.detect(tis, metadata); if (extractor == null) { // Let the handler process the embedded resource handler.handle(filename, type, tis); } else { // Use a temporary file to process the stream twice File file = tis.getFile(); // Let the handler process the embedded resource try (InputStream input = TikaInputStream.get(file)) { handler.handle(filename, type, input); } // Recurse extractor.extract(tis, extractor, handler); } } finally { tmp.dispose(); } }
/** * Getter for the contentType. If not set or {@link MediaType#OCTET_STREAM} * than the media type is detected.<p> * This method returns the MediaType and the Stream used to detect the * MimeType. This allows to reuse the stream and the mediaType * @param ci * @param mediaTypeArray * @return */ private MediaTypeAndStream extractMediaType(ContentItem ci) { MediaTypeAndStream mtas = new MediaTypeAndStream(); mtas.mediaType = getMediaType(ci.getBlob()); mtas.uri = ci.getUri().getUnicodeString(); if(mtas.mediaType == null || mtas.mediaType.equals(MediaType.OCTET_STREAM)){ mtas.in = new BufferedInputStream(ci.getStream()); Metadata m = new Metadata(); m.add(Metadata.RESOURCE_NAME_KEY, mtas.uri); try { mtas.mediaType = detector.detect(mtas.in, m); } catch (IOException e) { log.warn("Exception while detection the MediaType of the" + "parsed ContentItem "+ci.getUri(),e); IOUtils.closeQuietly(mtas.in); mtas.in = null; } } return mtas; }
public MediaType detect(InputStream inputStream, Metadata metadata) throws IOException { String ct = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE); //make sure never to return null -- TIKA-1845 MediaType type = null; if (ct != null) { //this can return null if ct is not a valid mime type type = MediaType.parse(ct); } if (type != null) { return type; } else { return detector.detect(inputStream, metadata); } } });
assertDetect(MediaType.OCTET_STREAM, "See README"); // but not this assertDetect(MediaType.application("envi.hdr"), "ang20150420t182050_corr_v1e_img.hdr"); assertEquals( MediaType.OCTET_STREAM, detector.detect(null, new Metadata())); } catch (IOException e) { fail("NameDetector should never throw an IOException");
public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, boolean outputHtml) throws SAXException, IOException { inputStream = TikaInputStream.get(inputStream); MediaType contentType = detector.detect(inputStream, metadata); String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY); File outputFile = null; if (name == null) { TikaInputStream tin = (TikaInputStream) inputStream; if (tin.getOpenContainer() != null && tin.getOpenContainer() instanceof DirectoryEntry) { POIFSFileSystem fs = new POIFSFileSystem(); copy((DirectoryEntry) tin.getOpenContainer(), fs.getRoot());
public MediaType detect(InputStream input, Metadata metadata) throws IOException { MediaType type = MediaType.OCTET_STREAM; for (Detector detector : getDetectors()) { //short circuit via OverrideDetector //can't rely on ordering because subsequent detector may //change Override's to a specialization of Override's if (detector instanceof OverrideDetector && metadata.get(TikaCoreProperties.CONTENT_TYPE_OVERRIDE) != null) { return detector.detect(input, metadata); } MediaType detected = detector.detect(input, metadata); if (registry.isSpecializationOf(detected, type)) { type = detected; } } return type; }